## Loading Dataset

In [None]:
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

EXPERIMENT_NAME = "hotpot_qa_orchestration"

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
generated_data_path = os.path.join(project_root, 'data', 'generated', f'{EXPERIMENT_NAME}.parquet')

## Evaluating dataset

In [None]:
# import metrics
from ragas.metrics import (
    ContextPrecision,
    ContextRecall,
    Faithfulness,
    AnswerRelevancy,
    AnswerCorrectness
)

# init metrics with evaluator LLM
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_ollama import ChatOllama
from langchain_huggingface import HuggingFaceEmbeddings

evaluator_llm = ChatOllama(
    model="mistral:7b",
    temperature=0.1,
)

evaluator_llm = LangchainLLMWrapper(evaluator_llm)

evaluator_embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)

evaluator_embeddings = LangchainEmbeddingsWrapper(evaluator_embeddings)

metrics = [
    Faithfulness(llm=evaluator_llm),
    AnswerCorrectness(llm=evaluator_llm)
]

In [None]:
import pandas as pd
from ragas import evaluate
from ragas import EvaluationDataset

# Load the CSV you created
df = pd.read_parquet(generated_data_path)

# Store the workflow_plan column before dropping it
workflow_plans = df[['workflow_plan']].copy()

# Convert to RAGAS dataset format
ragas_dataset_from_csv = EvaluationDataset.from_pandas(df.drop("workflow_plan", axis=1))

# Evaluate using the CSV data
result = evaluate(
    metrics=metrics,
    dataset=ragas_dataset_from_csv,
    llm=evaluator_llm,
    embeddings=evaluator_embeddings
)

# Save evaluation results
df_results = result.to_pandas()

# Add the workflow_plan column back by concatenating
# This assumes the order is preserved (which it should be)
df_results['workflow_plan'] = workflow_plans['workflow_plan'].values

# Save with the workflow_plan column included
df_results.to_csv(os.path.join(project_root, 'data', 'evaluated', f'{EXPERIMENT_NAME}_eval.csv'), index=False)

In [None]:
df_results.head()