In [1]:
import os
from athina.evals import DoesResponseAnswerQuery, ContextContainsEnoughInformation, Faithfulness, LlmEvaluator
from athina.loaders import RagLoader
from athina.keys import AthinaApiKey, OpenAiApiKey
from athina.runner.run import EvalRunner

OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY'))

In [2]:
# Create batch dataset from list of dict objects
raw_data = [
    {
        "query": "What is the capital of Greece?",
        "context": "Greece is often called the cradle of Western civilization.",
        "response": "Athens",
    },
    {
        "query": "What is the price of a Tesla Model 3?",
        "context": "Tesla Model 3 is a fully electric car.",
        "response": "I cannot answer this question as prices vary from country to country.",
    },
    {
        "query": "What is a shooting star?",
        "context": "Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light.",
        "response": "A shooting star is a meteor that burns up in the atmosphere.",
    }
]

dataset = RagLoader().load_dict(raw_data)
print(dataset)

In [4]:
# Run the eval suite
eval_model = "gpt-4-1106-preview"
eval_suite = [
    DoesResponseAnswerQuery(model=eval_model),
    Faithfulness(model=eval_model),
    ContextContainsEnoughInformation(model=eval_model),
    LlmEvaluator(model=eval_model, grading_criteria="If response contains profanity, then fail. Otherwise, pass.")
]

# Run the evaluation suite
batch_eval_result = EvalRunner.run_batch(evals=eval_suite, dataset=dataset)

print(batch_eval_result)

{'results': [{'data_point': {'query': 'What is the capital of Greece?', 'context': 'Greece is often called the cradle of Western civilization.', 'response': 'Athens'}, 'eval_results': [{'failure': False, 'reason': "The response directly answers the user's query by stating 'Athens,' which is the capital of Greece. It specifically addresses the question without providing unnecessary information.", 'runtime': 2520.3652381896973, 'model': 'gpt-4-1106-preview'}, {'failure': True, 'reason': "The response 'Athens' cannot be inferred from the provided context. The context only states that Greece is often called the cradle of Western civilization, but it does not specifically mention Athens or imply that Athens is the reason for this designation.", 'runtime': 2289.492130279541, 'model': 'gpt-4-1106-preview'}, {'failure': True, 'reason': "The context provided only mentions Greece as 'the cradle of Western civilization' and does not include any information about the capital of Greece. Therefore, 