In [None]:
import os
from athina.llms.openai_service import OpenAiService
from athina.evals import DoesResponseAnswerQuery, ContextContainsEnoughInformation, Faithfulness, CustomGrader
from athina.keys import AthinaApiKey, OpenAiApiKey

OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY'))

In [None]:
# Your inference call to OpenAI
model = "gpt-3.5-turbo"
query = "What is a shooting star?"
context = "Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light."
prompt = [
    {
        "role": "system",
        "content": f"Use the information provided to you to answer the user's question. Information: {context}"
    },
    {
        "role": "user",
        "content": query
    }
]

openai_service = OpenAiService()
response = openai_service.chat_completion(prompt, model=model)

print(response)

In [None]:
# Run the answer relevance evaluator
# Checks if the LLM response answers the user query sufficiently
DoesResponseAnswerQuery().run(query=query, response=response)

In [None]:
# Run the faithfulness evaluator
# Checks if the LLM response is faithful to the information provided to it
Faithfulness().run(context=context, response=response)

In [None]:
# Run the ContextContainsEnoughInformation evaluator
# Checks if the context contains enough information to answer the user query provided
ContextContainsEnoughInformation(model="gpt-4").run(context=context, query=query)

In [None]:
# custom evaluator
# Checks if the response mentions black holes
grading_criteria="If the response mentions black holes, then fail. Otherwise pass."
CustomGrader(grading_criteria=grading_criteria).run(context=context, query=query, response=response)