In [None]:
import os
from athina.evals import (
    RagasContextRelevancy,
    RagasAnswerRelevancy,
    RagasContextPrecision,
    RagasFaithfulness,
    RagasContextRecall,
    RagasAnswerSemanticSimilarity,
    RagasAnswerCorrectness,
    RagasHarmfulness,
    RagasMaliciousness,
    RagasCoherence,
    RagasConciseness
)
from athina.runner.run import EvalRunner
from athina.loaders import Loader
from athina.keys import AthinaApiKey, OpenAiApiKey
import pandas as pd

from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index import download_loader


from dotenv import load_dotenv
load_dotenv()

OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
AthinaApiKey.set_key(os.getenv('ATHINA_API_KEY'))

In [None]:
# create a llamaindex query engine
WikipediaReader = download_loader("WikipediaReader")
loader = WikipediaReader()
documents = loader.load_data(pages=['Berlin'])
vector_index = VectorStoreIndex.from_documents(
    documents, service_context=ServiceContext.from_defaults(chunk_size=512)
)

query_engine = vector_index.as_query_engine()

In [None]:
raw_data_llama_index = [
    {
        "query": "Where is Berlin?",
        "expected_response": "Berlin is the capital city of Germany"
    },
    {
        "query": "What is the main cuisine of Rome?",
        "expected_response": "Pasta dish with a sauce made with egg yolks"
    },
]

In [None]:
llama_index_dataset = Loader().load_from_llama_index(raw_data_llama_index, query_engine)
pd.DataFrame(llama_index_dataset)

In [None]:
eval_model = "gpt-3.5-turbo"
eval_suite = [
    RagasAnswerCorrectness(),
    RagasFaithfulness(),
    RagasContextRelevancy(),
    RagasAnswerRelevancy(),
]

# Run the evaluation suite
batch_eval_result = EvalRunner.run_suite(
    evals=eval_suite,
    data=llama_index_dataset,
    max_parallel_evals=1,   # If you increase this, you may run into rate limits
)

pd.DataFrame(batch_eval_result)