In [None]:
import os
from athina.evals import RagasAnswerCorrectness, RagasAnswerRelevancy, RagasFaithfulness, RagasConciseness
from athina.loaders import Loader
from athina.keys import AthinaApiKey, OpenAiApiKey
from athina.runner.run import EvalRunner
from athina.datasets import yc_query_mini
import pandas as pd

OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
# AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY'))

In [None]:
raw_data_ragas = [
    {
        "query": "What is the capital of India?",
        "context": ["India is a south asian country", "Mumbai is the financial capital of India", "New Delhi is the capital of India"],
        "response": "New Delhi is the capital",
    }
]
ragas_dataset = Loader().load_dict(raw_data_ragas)
pd.DataFrame(ragas_dataset)

In [None]:
eval_model = "gpt-3.5-turbo"
RagasConciseness(model=eval_model, api_key=os.getenv('OPENAI_API_KEY'), provider='openai').run_batch(data=ragas_dataset).to_df()


In [None]:
# Create batch dataset from list of dict objects
raw_data = yc_query_mini.data

dataset = Loader().load_dict(raw_data)
pd.DataFrame(dataset)

In [None]:
# Run the eval suite
eval_model = "gpt-4-1106-preview"
eval_suite = [
    RagasAnswerCorrectness(model=eval_model, api_key=os.getenv('OPENAI_API_KEY'), provider='openai'),
    RagasFaithfulness(model=eval_model, api_key=os.getenv('OPENAI_API_KEY'), provider='openai'),
    RagasAnswerRelevancy(model=eval_model, api_key=os.getenv('OPENAI_API_KEY'), provider='openai'),
]

# Run the evaluation suite
batch_eval_result = EvalRunner.run_suite(
    evals=eval_suite,
    data=dataset,
    max_parallel_evals=5,   # If you increase this, you may run into rate limits
)

pd.DataFrame(batch_eval_result)