In [2]:
from dataclasses import dataclass
from typing import Tuple, Optional

import requests
import json

from amazon_fmeval.model_runners.model_runner import ModelRunner
from amazon_fmeval.eval_algo_mapping import get_eval_algorithm
from amazon_fmeval. eval_algorithms.factual_knowledge import FactualKnowledgeConfig

In [3]:
# Create ChatGPT Custom ModelRunner
@dataclass
class ChatGPTModelConfig:
	temperature: float
	top_p: float
	max_tokens: int
	api_key: str


class ChatGPTModelRunner(ModelRunner):
	url = "https://api.openai.com/v1/chat/completions"

	def __init__(self, model_config: ChatGPTModelConfig):
		self.config = model_config

	def predict(self, prompt: str) -> Tuple[Optional[str], Optional[float]]:
		payload = json.dumps({
			"model": "gpt-3.5-turbo",
			"messages": [
				 {
					 "role": "user",
					 "content": prompt
				 }
			],
			"temperature": self.config.temperature,
			"top_p": self.config.top_p,
			"n": 1,
			"stream": False,
			"max_tokens": self.config.max_tokens,
			"presence_penalty": 0,
			"frequency_penalty": 0
		})
		headers = {
			 'Content-Type': 'application/json',
			 'Accept': 'application/json',
			 'Authorization': self.config.api_key
		}

		response = requests.request("POST", self.url, headers=headers, data=payload)

		return json.loads(response.text)["choices"][0]["message"]["content"], None

In [4]:
# Validate Custom Model Runner
# Note: Don't forget to include your api_key
config = ChatGPTModelConfig(
	api_key='',
	temperature=1.0,
	top_p=1.0,
	max_tokens=250
)
model_runner = ChatGPTModelRunner(config)
print(model_runner.predict("London is the capital of?"))

('London is the capital of England and the United Kingdom.', None)


In [5]:
# Evaluate factual_knowledge
eval_algorithm_config = FactualKnowledgeConfig("<OR>")
eval_algo = get_eval_algorithm("factual_knowledge")(eval_algorithm_config)

In [6]:
# Evaluate your custom sample
model_output = model_runner.predict("London is the capital of?")[0]
print(model_output)
eval_algo.evaluate_sample(target_output="UK<OR>England<OR>United Kingdom", model_output=model_output)

London is the capital of England and the United Kingdom.


[EvalScore(name='factual_knowledge', value=1)]

In [None]:
# Evaluate model with amazon-fmeval built-in dataset
from amazon_fmeval.data_loaders.data_config import DataConfig
dataset_config = DataConfig(
        dataset_name="TREX",
        dataset_uri="/Users/amamalh/Desktop/Workplace/amazon-fmeval/examples/trex_sample.jsonl",
        dataset_mime_type="application/jsonlines",
        model_input_location="question",
        target_output_location="answers",
        category_location="knowledge_category",
    )

# Evaluate model with amazon-fmeval built-in dataset
eval_outputs = eval_algo.evaluate(model=model_runner, dataset_config=dataset_config, prompt_template="$feature", save=True);

In [8]:
# Show Evaluation outputs
print(eval_outputs)

[EvalOutput(eval_name='factual_knowledge', dataset_name='TREX', dataset_scores=[EvalScore(name='factual_knowledge', value=0.7611940298507462)], prompt_template='$feature', category_scores=[CategoryScore(name='Capitals', scores=[EvalScore(name='factual_knowledge', value=0.83)]), CategoryScore(name='Subsidiary', scores=[EvalScore(name='factual_knowledge', value=0.693069306930693)])], output_path='/tmp/eval_results/')]


In [9]:
# Show first five rows of saved output
with open('/tmp/eval_results/factual_knowledge_TREX.jsonl') as f:
	lines = [next(f) for _ in range(5)]
for line in lines:
	print(line)

{"model_input": "Port au Prince is the capital of", "model_output": "Haiti.", "target_output": "Haiti", "category": "Capitals", "prompt": "Port au Prince is the capital of", "scores": [{"name": "factual_knowledge", "value": 1}]}

{"model_input": "Cape Coast is the capital of", "model_output": "the Central Region of Ghana.", "target_output": "Central regional<OR>Central Region, Ghana", "category": "Capitals", "prompt": "Cape Coast is the capital of", "scores": [{"name": "factual_knowledge", "value": 0}]}

{"model_input": "Toluca is the capital of", "model_output": "the State of Mexico in central Mexico.", "target_output": "Mexico State<OR>Estado de Mexico<OR>Mexico", "category": "Capitals", "prompt": "Toluca is the capital of", "scores": [{"name": "factual_knowledge", "value": 1}]}

{"model_input": "Port Moresby is the capital of", "model_output": "Papua New Guinea.", "target_output": "Papua New Guinea<OR>Papua", "category": "Capitals", "prompt": "Port Moresby is the capital of", "score