In [1]:
import os
from athina.llms.openai_service import OpenAiService
from athina.evals import DoesResponseAnswerQuery, ContextContainsEnoughInformation, Faithfulness, CustomGrader
from athina.keys import AthinaApiKey, OpenAiApiKey

OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY'))

In [2]:
# Your inference call to OpenAI
model = "gpt-3.5-turbo"

# user query
query = "What is a shooting star?"

# retrieved context - BAD
context = "Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light."

prompt = [
    {
        "role": "system",
        "content": f"Use the information provided to you to answer the user's question. Information: {context}"
    },
    {
        "role": "user",
        "content": query
    }
]

openai_service = OpenAiService()
# response = openai_service.chat_completion(prompt, model=model)

response = "A shooting star is a small piece of space debris, such as a rock or dust particle, that enters Earth's atmosphere and burns up due to friction with the air. This creates a streak of light in the sky, which is commonly referred to as a shooting star."

print(response)

A shooting star, also known as a meteor, is not related to black holes. It is a small piece of space debris, such as a rock or dust particle, that enters Earth's atmosphere and burns up due to friction with the air. This creates a streak of light in the sky, which is commonly referred to as a shooting star.


In [3]:
# Run the answer relevance evaluator
# Checks if the LLM response answers the user query sufficiently
DoesResponseAnswerQuery(model="gpt-3.5-turbo").run(query=query, response=response)

{'name': 'does_response_answer_query',
 'data': {'query': 'What is a shooting star?',
  'response': "A shooting star, also known as a meteor, is not related to black holes. It is a small piece of space debris, such as a rock or dust particle, that enters Earth's atmosphere and burns up due to friction with the air. This creates a streak of light in the sky, which is commonly referred to as a shooting star."},
 'failure': False,
 'reason': "The response answers specifically what the user is asking about and covers all aspects of the user's query. It explains that a shooting star is a small piece of space debris that enters Earth's atmosphere and burns up, creating a streak of light in the sky.",
 'runtime': 2464,
 'model': 'gpt-3.5-turbo'}

In [4]:
# Run the faithfulness evaluator
# Checks if the LLM response is faithful to the information provided to it
Faithfulness().run(context=context, response=response)

{'name': 'faithfulness',
 'data': {'context': 'Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light.',
  'response': "A shooting star, also known as a meteor, is not related to black holes. It is a small piece of space debris, such as a rock or dust particle, that enters Earth's atmosphere and burns up due to friction with the air. This creates a streak of light in the sky, which is commonly referred to as a shooting star."},
 'failure': True,
 'reason': "The response cannot be inferred from the provided context. The context only describes black holes and their properties. It does not provide any information about shooting stars or meteors, their composition, or their behavior when entering Earth's atmosphere.",
 'runtime': 4225,
 'model': 'gpt-4-1106-preview'}

In [5]:
# Run the ContextContainsEnoughInformation evaluator
# Checks if the context contains enough information to answer the user query provided
ContextContainsEnoughInformation(model="gpt-4").run(context=context, query=query)

{'name': 'context_contains_enough_information',
 'data': {'context': 'Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light.',
  'query': 'What is a shooting star?'},
 'failure': True,
 'reason': "The context provided talks about black holes, not shooting stars. Therefore, it does not contain sufficient information to answer the user's query about what a shooting star is.",
 'runtime': 5282,
 'model': 'gpt-4'}

In [6]:
# custom evaluator
# Checks if the response mentions black holes
grading_criteria="If the response mentions black holes, then fail. Otherwise pass."
CustomGrader(grading_criteria=grading_criteria).run(context=context, query=query, response=response)

{'name': 'custom_grader',
 'data': {'context': 'Black holes are stars that have collapsed under their own gravity. They are so dense that nothing can escape their gravitational pull, not even light.',
  'query': 'What is a shooting star?',
  'response': "A shooting star, also known as a meteor, is not related to black holes. It is a small piece of space debris, such as a rock or dust particle, that enters Earth's atmosphere and burns up due to friction with the air. This creates a streak of light in the sky, which is commonly referred to as a shooting star."},
 'failure': True,
 'reason': 'The response mentions black holes, which according to the grading criteria, results in a fail.',
 'runtime': 2367,
 'model': 'gpt-4-1106-preview'}