In [1]:
import os
from athina.llms.openai_service import OpenAiService
from athina.evals import DoesResponseAnswerQuery, ContextContainsEnoughInformation, Faithfulness, CustomGrader
from athina.loaders import RagLoader
from athina.keys import AthinaApiKey, OpenAiApiKey
from athina.interfaces.athina import AthinaFilters
import pandas as pd

OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY'))

In [2]:
# Create batch dataset from list of dict objects
raw_data = [
    {
        "query": "What is the capital of Greece?",
        "context": "Greece is often called the cradle of Western civilization.",
        "response": "Athens",
    },
    {
        "query": "What is the price of a Tesla Model 3?",
        "context": "Tesla Model 3 is a fully electric car.",
        "response": "I cannot answer this question as prices vary from country to country.",
    },
    {
        "query": "What is a shooting star?",
        "context": "Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light.",
        "response": "A shooting star is a meteor that burns up in the atmosphere.",
    }
]

dataset = RagLoader().load_dict(raw_data)
pd.DataFrame(dataset)

Unnamed: 0,query,context,response
0,What is the capital of Greece?,Greece is often called the cradle of Western c...,Athens
1,What is the price of a Tesla Model 3?,Tesla Model 3 is a fully electric car.,I cannot answer this question as prices vary f...
2,What is a shooting star?,Black holes are stars that have collapsed unde...,A shooting star is a meteor that burns up in t...


In [3]:
# Checks if the LLM response answers the user query sufficiently
eval_model = "gpt-3.5-turbo"
DoesResponseAnswerQuery(model=eval_model).run_batch(data=dataset).to_df()

Unnamed: 0,query,context,response,display_name,failed,grade_reason,runtime,model,metric_id,metric_value
0,What is the capital of Greece?,Greece is often called the cradle of Western civilization.,Athens,Does Response Answer Query,False,"The response 'Athens' answers specifically what the user is asking about, which is the capital of Greece. It covers all aspects of the user's query and provides the correct answer.",1790,gpt-3.5-turbo,failed,0.0
1,What is the price of a Tesla Model 3?,Tesla Model 3 is a fully electric car.,I cannot answer this question as prices vary from country to country.,Does Response Answer Query,False,"The response sufficiently answers the user's query. It acknowledges that the prices of Tesla Model 3 vary from country to country, which directly addresses the user's question about the price. Therefore, the response covers all aspects of the user's query and provides a satisfactory answer.",2157,gpt-3.5-turbo,failed,0.0
2,What is a shooting star?,"Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light.",A shooting star is a meteor that burns up in the atmosphere.,Does Response Answer Query,False,"The response answers the user's query sufficiently. It provides a clear and concise definition of a shooting star, explaining that it is a meteor that burns up in the atmosphere.",2188,gpt-3.5-turbo,failed,0.0


In [4]:
# Checks if the LLM response is faithful to the information provided to it
Faithfulness(model=eval_model).run_batch(data=dataset).to_df()

Unnamed: 0,query,context,response,display_name,failed,grade_reason,runtime,model,metric_id,metric_value
0,What is the capital of Greece?,Greece is often called the cradle of Western civilization.,Athens,Faithfulness,True,"The response 'Athens' cannot be inferred purely from the context. The context mentions that Greece is often called the cradle of Western civilization, but it does not directly state that Athens is the cradle of Western civilization.",2010,gpt-3.5-turbo,failed,1.0
1,What is the price of a Tesla Model 3?,Tesla Model 3 is a fully electric car.,I cannot answer this question as prices vary from country to country.,Faithfulness,False,"The response can be inferred from the context. The context states that the prices of Tesla Model 3 vary from country to country, which implies that the chatbot cannot provide a specific answer to the question about prices.",2151,gpt-3.5-turbo,failed,0.0
2,What is a shooting star?,"Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light.",A shooting star is a meteor that burns up in the atmosphere.,Faithfulness,True,"The response cannot be inferred from the provided context. The context talks about black holes and their gravitational pull, while the response talks about shooting stars and their burning up in the atmosphere. There is no connection or mention of shooting stars in the context, so the response cannot be inferred purely from the context.",2613,gpt-3.5-turbo,failed,1.0


In [5]:
# Checks if the context contains enough information to answer the user query provided
ContextContainsEnoughInformation(model=eval_model).run_batch(data=dataset).to_df()

Unnamed: 0,query,context,response,display_name,failed,grade_reason,runtime,model,metric_id,metric_value
0,What is the capital of Greece?,Greece is often called the cradle of Western civilization.,Athens,Context Contains Enough Information,True,"The context provided does not contain sufficient information to answer the user's query about the capital of Greece. The context only mentions that Greece is often called the cradle of Western civilization, but it does not provide any specific information about the capital city.",2183,gpt-3.5-turbo,failed,1.0
1,What is the price of a Tesla Model 3?,Tesla Model 3 is a fully electric car.,I cannot answer this question as prices vary from country to country.,Context Contains Enough Information,True,"The context provided does not contain sufficient information to answer the user's query about the price of a Tesla Model 3. The context only mentions that Tesla Model 3 is a fully electric car, but it does not provide any specific information about its price.",2078,gpt-3.5-turbo,failed,1.0
2,What is a shooting star?,"Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light.",A shooting star is a meteor that burns up in the atmosphere.,Context Contains Enough Information,True,"The context provided does not contain any information related to shooting stars. Therefore, the chatbot cannot answer the user's query with the given context.",1861,gpt-3.5-turbo,failed,1.0


In [6]:
# custom evaluator
# Checks if the response mentions black holes
grading_criteria="If the response mentions black holes, then fail. Otherwise pass."
CustomGrader(model=eval_model, grading_criteria=grading_criteria).run_batch(data=dataset).to_df()

Unnamed: 0,query,context,response,display_name,failed,grade_reason,runtime,model,metric_id,metric_value
0,What is the capital of Greece?,Greece is often called the cradle of Western civilization.,Athens,Custom,False,The response does not mention black holes.,1380,gpt-3.5-turbo,failed,0.0
1,What is the price of a Tesla Model 3?,Tesla Model 3 is a fully electric car.,I cannot answer this question as prices vary from country to country.,Custom,False,"The response does not mention black holes, so it passes the grading criteria.",1717,gpt-3.5-turbo,failed,0.0
2,What is a shooting star?,"Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light.",A shooting star is a meteor that burns up in the atmosphere.,Custom,False,The response does not mention black holes.,1562,gpt-3.5-turbo,failed,0.0
