In [1]:
import os
from athina.evals import RagasAnswerCorrectness, RagasAnswerRelevancy, RagasContextRelevancy, RagasFaithfulness, RagasConciseness
from athina.loaders import Loader
from athina.keys import AthinaApiKey, OpenAiApiKey
from athina.runner.run import EvalRunner
from athina.datasets import yc_query_mini
from athina.interfaces.athina import AthinaExperiment
import pandas as pd

OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY'))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
raw_data_ragas = [
    {
        "query": "What is the capital of India?",
        "context": ["India is a south asian country", "Mumbai is the financial capital of India", "New Delhi is the capital of India"],
        "response": "New Delhi is the capital",
    }
]
ragas_dataset = Loader().load_dict(raw_data_ragas)
pd.DataFrame(ragas_dataset)

Unnamed: 0,query,context,response,expected_response
0,What is the capital of India?,"[India is a south asian country, Mumbai is the...",New Delhi is the capital,


In [3]:
eval_model = "gpt-3.5-turbo"
RagasConciseness(model=eval_model).run_batch(data=ragas_dataset).to_df()


evaluating with [conciseness]


100%|██████████| 1/1 [00:00<00:00,  1.30it/s]


Unnamed: 0,query,context,response,expected_response,display_name,failed,grade_reason,runtime,model,ragas_conciseness
0,What is the capital of India?,"[India is a south asian country, Mumbai is the financial capital of India, New Delhi is the capital of India]",New Delhi is the capital,,Ragas Conciseness,,"This is calculated by how efficiently generated llm response conveys information or ideas clearly and efficiently, without unnecessary or redundant details",1008,gpt-3.5-turbo,1.0


In [4]:
# Create batch dataset from list of dict objects
raw_data = yc_query_mini.data

dataset = Loader().load_dict(raw_data)
pd.DataFrame(dataset)

Unnamed: 0,query,context,response,expected_response
0,What are some successful companies that went through YC?,"[Y Combinator has invested in companies in various fields like FinTech, Healthcare, AI, etc.]","Airbnb, Dropbox, Stripe, Reddit, Coinbase, Instacart.",Airbnb and Stripe are 2 of the successful companies that went through YC.
1,In which city is YC located?,"[Y Combinator is located in Mountain View, California.]",Y Combinator is located in San Francisco,"YC is located in Mountain View, California."
2,How much equity does YC take?,[Y Combinator invests $500k in 200 startups twice a year.],YC invests $150k for 7%.,I cannot answer this question as I do not have enough information.
3,How much equity does YC take?,[Y Combinator invests $500k in 200 startups twice a year.],I cannot answer this question as I do not have enough information.,I cannot answer this question as I do not have enough information.
4,Who founded YC and when was it founded?,"[Y Combinator was founded in March 2005 by Paul Graham, Jessica Livingston, Trevor Blackwell, and Robert Tappan Morris.]",Y Combinator was founded in 2005,"Y Combinator was founded in March 2005 by Paul Graham, Jessica Livingston, Trevor Blackwell, and Robert Tappan Morris."
5,Does Y Combinator invest in startups outside the US?,[Y Combinator invests in startups from all over the world.],"Yes, Y Combinator invests in international startups as well as US startups.","Yes, Y Combinator invests in startups from all over the world."
6,How much does YC invest in startups?,[YC invests $150k for 7%.],$150k,YC invests $150k for 7%.
7,What is YC's motto?,[Y Combinator's motto is 'Make something people want'.],Make something people want,Make something people want


In [5]:
# Run the eval suite
eval_model = "gpt-4-1106-preview"
eval_suite = [
    RagasAnswerCorrectness(),
    RagasFaithfulness(),
    RagasContextRelevancy(),
    RagasAnswerRelevancy(),
]

# Run the evaluation suite
batch_eval_result = EvalRunner.run_suite(
    evals=eval_suite,
    data=dataset,
    max_parallel_evals=1,   # If you increase this, you may run into rate limits
)

pd.DataFrame(batch_eval_result)

evaluating with [answer_correctness]


100%|██████████| 1/1 [00:03<00:00,  3.50s/it]


evaluating with [answer_correctness]


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]


evaluating with [answer_correctness]


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]


evaluating with [answer_correctness]


100%|██████████| 1/1 [00:01<00:00,  1.23s/it]
  value = np.nanmean(self.scores[cn])
[33mWARN: Invalid metric value: nan
[0m


evaluating with [answer_correctness]


100%|██████████| 1/1 [00:02<00:00,  2.37s/it]


evaluating with [answer_correctness]


100%|██████████| 1/1 [00:01<00:00,  1.77s/it]


evaluating with [answer_correctness]


100%|██████████| 1/1 [00:01<00:00,  1.59s/it]


evaluating with [answer_correctness]


100%|██████████| 1/1 [00:01<00:00,  1.49s/it]


evaluating with [faithfulness]


100%|██████████| 1/1 [00:04<00:00,  4.65s/it]


evaluating with [faithfulness]


100%|██████████| 1/1 [00:01<00:00,  1.87s/it]


evaluating with [faithfulness]


100%|██████████| 1/1 [00:01<00:00,  1.88s/it]


evaluating with [faithfulness]


100%|██████████| 1/1 [00:01<00:00,  1.12s/it]


evaluating with [faithfulness]


100%|██████████| 1/1 [00:01<00:00,  1.71s/it]


evaluating with [faithfulness]


100%|██████████| 1/1 [00:03<00:00,  3.78s/it]


evaluating with [faithfulness]


100%|██████████| 1/1 [00:01<00:00,  1.82s/it]


evaluating with [faithfulness]


100%|██████████| 1/1 [00:01<00:00,  1.69s/it]


evaluating with [context_relevancy]


100%|██████████| 1/1 [00:00<00:00,  2.59it/s]


evaluating with [context_relevancy]


100%|██████████| 1/1 [00:00<00:00,  2.20it/s]


evaluating with [context_relevancy]


100%|██████████| 1/1 [00:00<00:00,  1.44it/s]


evaluating with [context_relevancy]


100%|██████████| 1/1 [00:00<00:00,  2.09it/s]


evaluating with [context_relevancy]


100%|██████████| 1/1 [00:03<00:00,  3.07s/it]


evaluating with [context_relevancy]


100%|██████████| 1/1 [00:00<00:00,  2.19it/s]


evaluating with [context_relevancy]


100%|██████████| 1/1 [00:00<00:00,  1.56it/s]


evaluating with [context_relevancy]


100%|██████████| 1/1 [00:00<00:00,  2.28it/s]


evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:00<00:00,  1.10it/s]


evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:00<00:00,  1.23it/s]


evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:01<00:00,  1.23s/it]


evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:00<00:00,  1.16it/s]


evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:00<00:00,  1.13it/s]


evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:01<00:00,  1.72s/it]


evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:01<00:00,  1.03s/it]


evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:00<00:00,  1.18it/s]


Unnamed: 0,query,context,response,expected_response,Ragas Answer Correctness ragas_answer_correctness,Ragas Faithfulness ragas_faithfulness,Ragas Context Relevancy ragas_context_relevancy,Ragas Answer Relevancy ragas_answer_relevancy
0,What are some successful companies that went through YC?,"[Y Combinator has invested in companies in various fields like FinTech, Healthcare, AI, etc.]","Airbnb, Dropbox, Stripe, Reddit, Coinbase, Instacart.",Airbnb and Stripe are 2 of the successful companies that went through YC.,0.718469,0.833333,1.0,0.916001
1,In which city is YC located?,"[Y Combinator is located in Mountain View, California.]",Y Combinator is located in San Francisco,"YC is located in Mountain View, California.",0.213528,0.0,1.0,0.887728
2,How much equity does YC take?,[Y Combinator invests $500k in 200 startups twice a year.],YC invests $150k for 7%.,I cannot answer this question as I do not have enough information.,0.183196,0.0,1.0,0.881647
3,How much equity does YC take?,[Y Combinator invests $500k in 200 startups twice a year.],I cannot answer this question as I do not have enough information.,I cannot answer this question as I do not have enough information.,,0.0,1.0,0.0
4,Who founded YC and when was it founded?,"[Y Combinator was founded in March 2005 by Paul Graham, Jessica Livingston, Trevor Blackwell, and Robert Tappan Morris.]",Y Combinator was founded in 2005,"Y Combinator was founded in March 2005 by Paul Graham, Jessica Livingston, Trevor Blackwell, and Robert Tappan Morris.",0.609783,1.0,1.0,0.897087
5,Does Y Combinator invest in startups outside the US?,[Y Combinator invests in startups from all over the world.],"Yes, Y Combinator invests in international startups as well as US startups.","Yes, Y Combinator invests in startups from all over the world.",0.743316,0.5,1.0,0.981736
6,How much does YC invest in startups?,[YC invests $150k for 7%.],$150k,YC invests $150k for 7%.,0.713585,0.0,1.0,0.92122
7,What is YC's motto?,[Y Combinator's motto is 'Make something people want'.],Make something people want,Make something people want,1.0,1.0,1.0,0.921079
