In [1]:
import os
from athina.llms.openai_service import OpenAiService
from athina.evals import DoesResponseAnswerQuery, ContextContainsEnoughInformation, Faithfulness, LlmEvaluator
from athina.keys import AthinaApiKey, OpenAiApiKey

OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY'))

In [2]:
# Your inference call to OpenAI
model = "gpt-3.5-turbo"
user_query = "What is a shooting star?"
context = "Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light."
prompt = [
    {
        "role": "system",
        "content": f"Use the information provided to you to answer the user's question. Information: {context}"
    },
    {
        "role": "user",
        "content": user_query
    }
]

openai_service = OpenAiService()
response = openai_service.chat_completion(prompt, model=model)
print(response)

A shooting star, also known as a meteor, is not related to black holes. It is a small celestial object, such as a rock or dust particle, that enters the Earth's atmosphere and burns up due to friction with the air. This creates a streak of light in the sky, which is commonly referred to as a shooting star.


In [3]:
# Run the answer relevance evaluator
# Checks if the LLM response answers the user query sufficiently
DoesResponseAnswerQuery().run(user_query=user_query, response=response)

{'failure': False,
 'reason': "The response answers the user's query by explaining what a shooting star is, also clarifying that it is known as a meteor and is not related to black holes. It covers the aspects of the user's query by describing the nature of a shooting star as a small celestial object that burns up in the Earth's atmosphere, creating a visible streak of light."}

In [4]:
# Run the faithfulness evaluator
# Checks if the LLM response is faithful to the information provided to it
Faithfulness().run(context=context, response=response)

{'failure': True,
 'reason': 'The response cannot be inferred from the provided context. The context describes black holes and their properties, but it does not provide any information about shooting stars or meteors, nor does it make any comparison between black holes and other celestial phenomena.'}

In [5]:
# Run the ContextContainsEnoughInformation evaluator
# Checks if the context contains enough information to answer the user query provided
ContextContainsEnoughInformation(model="gpt-4").run(context=context, user_query=user_query)

{'failure': True,
 'reason': "The context provided talks about black holes, not shooting stars. Therefore, it does not contain sufficient information to answer the user's query about what a shooting star is."}

In [7]:
# Run the context relevance evaluator
# Checks if the context is relevant to the user query
grading_criteria="If the response mentions black holes, then fail. Otherwise pass."
LlmEvaluator(grading_criteria=grading_criteria).run(context=context, user_query=user_query, response=response)

{'failure': True,
 'reason': 'The response mentions black holes, which according to the grading criteria, results in a fail.'}