In [1]:
import os
from athina.evals import DoesResponseAnswerQuery, ContextContainsEnoughInformation, Faithfulness
from athina.loaders import Loader
from athina.keys import AthinaApiKey, OpenAiApiKey
from athina.runner.run import EvalRunner
from athina.datasets import yc_query_mini
from athina.datasets import dataset_1, dataset_2
from athina.interfaces.athina import AthinaExperiment
import pandas as pd

from dotenv import load_dotenv
load_dotenv()

OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY'))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Create batch dataset from list of dict objects
raw_data = dataset_1.data
for entry in raw_data:
    entry['context'] = [entry['context']]
dataset = Loader().load_dict(raw_data)
pd.DataFrame(dataset)

Unnamed: 0,query,context,response,expected_response
0,Which is better the Free Software Movement or ...,"[""Both the modern free software movement and t...",The Free Software Movement is the better organ...,
1,Given this paragraph describing the conclusion...,"[""During one iteration of the loop, Phil repor...",This paragraph summarizing the conclusion of G...,
2,"Given this reference text about the Koala, tel...","[""The koala or, inaccurately, koala bear (Phas...",Eucalyptus makes up most of their diet. Joeys ...,
3,Where is the Porce III Dam,"[""The Porce III Dam is an embankment dam on th...",The Porce III Dam is an embankment dam on the ...,
4,"According to this reference text about UVA, wh...","[""In the 21st century alone, UVA teams have wo...","Additionally, the UVA women's rowing team has ...",
5,Which team drafted Marques Pope?,"[""Marquez Phillips Pope (born October 29, 1970...",The team that drafted Marquez Pope was the San...,
6,Given the text about the 2019 Masters Tourname...,"[""The 2019 Masters Tournament was the 83rd edi...",Tiger Woods won the golf tournament at Augusta...,
7,Who wrote the music and lyrics for Replay?,"[""\""Tomorrow\"" is a show tune from the musical...",Charles Strouse,
8,What is Sinking Sand?,"[""Quicksand, also known as sinking sand, is a ...",Sinking Sand is most commonly known as Quicksand.,
9,What book was the musical Hamilton based on?,"[""Hamilton is a sung-and-rapped-through musica...","Hamilton, the musical was based on the 2004 bo...",


In [3]:
raw_data = dataset_2.data
for entry in raw_data:
    entry['context'] = [entry['context']]
dataset_new = Loader().load_dict(raw_data)
pd.DataFrame(dataset_new)

Unnamed: 0,query,context,response,expected_response
0,Which is better the Free Software Movement or ...,"[Both movements, the Free Software Movement an...",I lean towards the Free Software Movement due ...,
1,Given this paragraph describing the conclusion...,"[At the end of Groundhog Day, Phil captivates ...",The text details Phil's activities on the day ...,
2,"Given this reference text about the Koala, tel...","[Koalas, native Australian marsupials, primari...",Their diet consists mostly of eucalyptus leave...,
3,Where is the Porce III Dam,"[Situated on the Porce River, approximately 90...",The Porce III Dam is located on the Porce Rive...,
4,"According to this reference text about UVA, wh...","[Throughout the 21st century, UVA's women's ro...",UVA women have clinched NCAA rowing titles in ...,
5,Which team drafted Marques Pope?,"[In the 1992 NFL Draft, Marques Pope was selec...",Marques Pope was drafted by the San Diego Char...,
6,Given the text about the 2019 Masters Tourname...,"[The 83rd Masters Tournament in 2019, held at ...",Tiger Woods secured the victory at the Augusta...,
7,Who wrote the music and lyrics for Replay?,[Charles Strouse originally composed the song ...,Charles Strouse was the creator of both music ...,
8,What is Sinking Sand?,"[Quicksand, or sinking sand, is a hydro-mechan...","Quicksand, known as sinking sand, is a mixture...",
9,What book was the musical Hamilton based on?,"[Lin-Manuel Miranda's groundbreaking musical, ...",The musical Hamilton was inspired by Ron Chern...,


In [5]:
# Run the eval suite
eval_model = "gpt-4"

# Record an experiment
experiment = AthinaExperiment(
    experiment_name="yc-question-chatbot",
    experiment_description="YC Q&A with RAG model and few-shot examples",
    language_model_provider="openai",
    language_model_id="gpt-3.5-turbo",
    prompt_template=[],
    dataset_name="Dataset 1",
)

In [6]:
# Define your evaluation suite
eval_suite = [
    DoesResponseAnswerQuery(model=eval_model),
    Faithfulness(model=eval_model),
    ContextContainsEnoughInformation(model=eval_model),
]

# Run the evaluation suite
batch_eval_result = EvalRunner.run_full_suite(
    dataset_name="Dataset 2",
    evals=eval_suite,
    data=dataset_new,
    max_parallel_evals=3
)

batch_eval_result

You can view the evaluation results at Athina: Dataset 2


Unnamed: 0,query,context,response,expected_response,Does Response Answer Query passed,Faithfulness passed,Context Contains Enough Information passed
0,Which is better the Free Software Movement or ...,"[Both movements, the Free Software Movement an...",I lean towards the Free Software Movement due ...,,0.0,0.0,0.0
1,Given this paragraph describing the conclusion...,"[At the end of Groundhog Day, Phil captivates ...",The text details Phil's activities on the day ...,,0.0,1.0,1.0
2,"Given this reference text about the Koala, tel...","[Koalas, native Australian marsupials, primari...",Their diet consists mostly of eucalyptus leave...,,1.0,1.0,0.0
3,Where is the Porce III Dam,"[Situated on the Porce River, approximately 90...",The Porce III Dam is located on the Porce Rive...,,1.0,1.0,1.0
4,"According to this reference text about UVA, wh...","[Throughout the 21st century, UVA's women's ro...",UVA women have clinched NCAA rowing titles in ...,,1.0,1.0,0.0
5,Which team drafted Marques Pope?,"[In the 1992 NFL Draft, Marques Pope was selec...",Marques Pope was drafted by the San Diego Char...,,1.0,1.0,1.0
6,Given the text about the 2019 Masters Tourname...,"[The 83rd Masters Tournament in 2019, held at ...",Tiger Woods secured the victory at the Augusta...,,1.0,0.0,0.0
7,Who wrote the music and lyrics for Replay?,[Charles Strouse originally composed the song ...,Charles Strouse was the creator of both music ...,,0.0,0.0,0.0
8,What is Sinking Sand?,"[Quicksand, or sinking sand, is a hydro-mechan...","Quicksand, known as sinking sand, is a mixture...",,1.0,1.0,1.0
9,What book was the musical Hamilton based on?,"[Lin-Manuel Miranda's groundbreaking musical, ...",The musical Hamilton was inspired by Ron Chern...,,1.0,1.0,1.0
