In [13]:
from datasets import load_dataset
from rich import print as pprint
dataset = load_dataset(
    "explodinggradients/amnesty_qa",
    "english_v3",
    trust_remote_code=True
)

Repo card metadata block was not found. Setting CardData to empty.


In [2]:
# user_input 為問題
# reference 為標準答案
# response 為RAG應用所提供的回答
# retrieved_contexts 為RAG所檢索的文檔
dataset

DatasetDict({
    eval: Dataset({
        features: ['user_input', 'reference', 'response', 'retrieved_contexts'],
        num_rows: 20
    })
})

In [4]:
import pandas as pd

dataset_df = pd.DataFrame(dataset["eval"])
dataset_df.head()

Unnamed: 0,user_input,reference,response,retrieved_contexts
0,What are the global implications of the USA Su...,The global implications of the USA Supreme Cou...,The global implications of the USA Supreme Cou...,"[- In 2022, the USA Supreme Court handed down ..."
1,Which companies are the main contributors to G...,"According to the Carbon Majors database, the m...","According to the Carbon Majors database, the m...","[In recent years, there has been increasing pr..."
2,Which private companies in the Americas are th...,The largest private companies in the Americas ...,"According to the Carbon Majors database, the l...",[The issue of greenhouse gas emissions has bec...
3,What action did Amnesty International urge its...,Amnesty International urged its supporters to ...,Amnesty International urged its supporters to ...,"[In the case of the Ogoni 9, Amnesty Internati..."
4,What are the recommendations made by Amnesty I...,The recommendations made by Amnesty Internatio...,Amnesty International made several recommendat...,"[In recent years, Amnesty International has fo..."


In [18]:
print(type(dataset_df.loc[0, "retrieved_contexts"]))
pprint(dataset_df.loc[0, "retrieved_contexts"])

<class 'list'>


In [19]:
from ragas import EvaluationDataset

eval_dataset = EvaluationDataset.from_hf_dataset(dataset["eval"])

In [20]:
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, SemanticSimilarity
from ragas import evaluate

In [21]:
import os
os.environ["OPENAI_API_KEY"] = os.environ["AZURE_OPENAI_API_KEY"]

# other configuration
azure_config = {
    "base_url": "https://llmresource.openai.azure.com/",  # your endpoint
    "model_deployment": "gpt-4o-mini",  # your model deployment name
    "model_name": "gpt-4o-mini",  # your model name
    "embedding_deployment": "text-embedding-3-small",  # your embedding deployment name
    "embedding_name": "text-embedding-3-small",  # your embedding name
}

In [22]:
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

evaluator_llm = LangchainLLMWrapper(AzureChatOpenAI(
    openai_api_version="2024-08-01-preview",
    azure_endpoint=azure_config["base_url"],
    azure_deployment=azure_config["model_deployment"],
    model=azure_config["model_name"],
    validate_base_url=False,
))

# init the embeddings for answer_relevancy, answer_correctness and answer_similarity
evaluator_embeddings = LangchainEmbeddingsWrapper(AzureOpenAIEmbeddings(
    openai_api_version="2023-05-15",
    azure_endpoint=azure_config["base_url"],
    azure_deployment=azure_config["embedding_deployment"],
    model=azure_config["embedding_name"],
))

In [23]:
metrics = [
    LLMContextRecall(llm=evaluator_llm), 
    FactualCorrectness(llm=evaluator_llm), 
    Faithfulness(llm=evaluator_llm),
    SemanticSimilarity(embeddings=evaluator_embeddings)
]
results = evaluate(dataset=eval_dataset, metrics=metrics)

Evaluating: 100%|██████████| 80/80 [01:07<00:00,  1.19it/s]


In [24]:
df = results.to_pandas()
df.head()

Unnamed: 0,user_input,retrieved_contexts,response,reference,context_recall,factual_correctness,faithfulness,semantic_similarity
0,What are the global implications of the USA Su...,"[- In 2022, the USA Supreme Court handed down ...",The global implications of the USA Supreme Cou...,The global implications of the USA Supreme Cou...,1.0,0.62,0.866667,0.876302
1,Which companies are the main contributors to G...,"[In recent years, there has been increasing pr...","According to the Carbon Majors database, the m...","According to the Carbon Majors database, the m...",1.0,0.33,0.12,0.803631
2,Which private companies in the Americas are th...,[The issue of greenhouse gas emissions has bec...,"According to the Carbon Majors database, the l...",The largest private companies in the Americas ...,1.0,0.43,0.0,0.890809
3,What action did Amnesty International urge its...,"[In the case of the Ogoni 9, Amnesty Internati...",Amnesty International urged its supporters to ...,Amnesty International urged its supporters to ...,1.0,0.25,0.6,0.733717
4,What are the recommendations made by Amnesty I...,"[In recent years, Amnesty International has fo...",Amnesty International made several recommendat...,The recommendations made by Amnesty Internatio...,1.0,0.06,0.047619,0.759165
