In [1]:
import os
from athina.evals import CustomPrompt
from athina.loaders import Loader
from athina.keys import AthinaApiKey, OpenAiApiKey
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY'))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Create batch dataset from list of dict objects
raw_data = [
    {
        "query": "What is the capital of Greece?",
        "context": "Greece is often called the cradle of Western civilization.",
        "response": "Athens",
    },
    {
        "query": "What is the price of a Tesla Model 3?",
        "context": "Tesla Model 3 is a fully electric car.",
        "response": "I cannot answer this question as prices vary from country to country.",
    },
    {
        "query": "What is a shooting star?",
        "context": "Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light.",
        "response": "A shooting star is a meteor that burns up in the atmosphere.",
    }
]

dataset = Loader().load_dict(raw_data)
pd.DataFrame(dataset)

Unnamed: 0,query,context,response
0,What is the capital of Greece?,Greece is often called the cradle of Western c...,Athens
1,What is the price of a Tesla Model 3?,Tesla Model 3 is a fully electric car.,I cannot answer this question as prices vary f...
2,What is a shooting star?,Black holes are stars that have collapsed unde...,A shooting star is a meteor that burns up in t...


In [3]:
# Checks if the LLM response answers the user query sufficiently
eval_model = "gpt-3.5-turbo"
eval_prompt = """
If the response refuses to answer the user's query, then fail. Otherwise pass.

User Query: {query}
Response: {response}"""
CustomPrompt(
    eval_prompt=eval_prompt, 
    model=eval_model, 
    display_name="Response should answer user's query",
).run_batch(data=dataset).to_df()

Unnamed: 0,query,context,response,display_name,failed,grade_reason,runtime,model,passed
0,What is the capital of Greece?,Greece is often called the cradle of Western civilization.,Athens,Response should answer user's query,False,The response provided the correct answer to the user's query.,968,gpt-3.5-turbo,1.0
1,What is the price of a Tesla Model 3?,Tesla Model 3 is a fully electric car.,I cannot answer this question as prices vary from country to country.,Response should answer user's query,True,"The response refuses to answer the user's query, stating that prices vary by country. This does not provide any useful information to the user.",977,gpt-3.5-turbo,0.0
2,What is a shooting star?,"Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light.",A shooting star is a meteor that burns up in the atmosphere.,Response should answer user's query,False,The response directly answers the user's query by defining what a shooting star is.,1244,gpt-3.5-turbo,1.0
