In [None]:
import pandas as pd


df = pd.read_csv('answers.csv')
df.head()

In [None]:
import json

import os

from datasets import Dataset 

from dotenv import load_dotenv

load_dotenv()

df['context'] = df['context'].str.replace("'", '"', regex=False)

data_samples = {
    'question': df['query'].tolist(),
    'answer': df['answer'].tolist(),
    'contexts' : [json.loads(x) for x in df['context'].tolist()],
    'ground_truth': df['gold_answers'].tolist()
}

dataset = Dataset.from_dict(data_samples)


In [None]:
from langchain.chat_models import GigaChat
from langchain_community.embeddings import GigaChatEmbeddings

from ragas import evaluate, RunConfig
# если не через gigaragas, то надо убрать приписку _ru
from ragas.metrics import faithfulness_ru,  answer_relevancy_ru, context_precision_ru, context_utilization_ru, context_recall_ru

llm = GigaChat(
    credentials=os.getenv("GIGACHAT_AUTH_KEY"),
    scope="GIGACHAT_API_PERS",
    model='GigaChat-2', 
    verify_ssl_certs=False,
    temperature=0.0
)

# Следует отдельно задать тип эмбеддингов. В качестве альтернативы эмбеддингам GigaChat 
# можно также использовать эмбеддинги HuggingFace
embeddings = GigaChatEmbeddings(
    credentials=os.getenv("GIGACHAT_AUTH_KEY"), 
    scope="GIGACHAT_API_PERS", 
    verify_ssl_certs=False
)


def on_error(e, job):
    print("ERROR")
    print("JOB:", job)
    print("ERROR:", e)

run_config = RunConfig(
    max_workers=1,
    callbacks={"on_error": on_error}
)

score = evaluate(
    dataset=dataset, 
    metrics=[faithfulness_ru,  answer_relevancy_ru, context_precision_ru, context_utilization_ru, context_recall_ru],
    llm=llm,
    embeddings=embeddings,
    run_config=run_config
)
score.to_pandas()

In [None]:
score.to_pandas().to_csv('eval_metrics.csv', index=False)