In [9]:
import os
import json
import pandas as pd

from rich import print as pprint
from ragas import evaluate, EvaluationDataset
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, SemanticSimilarity

In [11]:
# user_input 為問題
# reference 為標準答案
# response 為RAG應用所提供的回答
# retrieved_contexts 為RAG所檢索的文檔

dataset_df = pd.read_csv("./evalData_Asia_Cement_Corporation.csv")
dataset_df["retrieved_contexts"] = dataset_df["retrieved_contexts"].apply(json.loads)
dataset_df = dataset_df[['user_input', 'reference', 'response', 'retrieved_contexts']]
eval_dataset = EvaluationDataset.from_pandas(dataset_df)

In [12]:
eval_dataset

EvaluationDataset(features=['user_input', 'retrieved_contexts', 'response', 'reference'], len=4)

In [13]:
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

os.environ["OPENAI_API_KEY"] = os.environ["AZURE_OPENAI_API_KEY"]

evaluator_llm = LangchainLLMWrapper(AzureChatOpenAI(
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
    model=os.environ["AZURE_OPENAI_CHAT_MODEL_NAME"],
    validate_base_url=False,
))

# init the embeddings for answer_relevancy, answer_correctness and answer_similarity
evaluator_embeddings = LangchainEmbeddingsWrapper(AzureOpenAIEmbeddings(
    openai_api_version=os.environ["AZURE_OPENAI_API_EMBED_VERSION"],
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_EMBED_DEPLOYMENT_NAME"],
    model=os.environ["AZURE_OPENAI_EMBED_MODEL_NAME"],
))

In [14]:
metrics = [
    LLMContextRecall(llm=evaluator_llm), 
    FactualCorrectness(llm=evaluator_llm), 
    Faithfulness(llm=evaluator_llm),
    SemanticSimilarity(embeddings=evaluator_embeddings)
]
results = evaluate(dataset=eval_dataset, metrics=metrics)

Evaluating: 100%|██████████| 16/16 [00:46<00:00,  2.91s/it]


In [15]:
eval_result = results.to_pandas()
eval_result.head()

Unnamed: 0,user_input,retrieved_contexts,response,reference,context_recall,factual_correctness,faithfulness,semantic_similarity
0,"What frameworks does the ESG report follow, an...",[-----Entities-----\nid|entity|description|num...,## Overview of ESG Reporting Frameworks\n\nThe...,## Frameworks Followed in the ESG Report\n\nTh...,0.722222,0.29,1.0,0.899229
1,Is the organizational scope and time frame of ...,[-----Entities-----\nid|entity|description|num...,### Organizational Scope and Time Frame of the...,## Organizational Scope and Time Frame of the ...,0.636364,0.31,0.25,0.757469
2,How does the report link the company’s ESG ini...,[-----Entities-----\nid|entity|description|num...,## Linking ESG Initiatives to Business Strateg...,## Linking ESG Initiatives to Business Strateg...,0.882353,0.37,1.0,0.815731
3,Does the report compare current ESG performanc...,[-----Entities-----\nid|entity|description|num...,### ESG Performance Data Comparisons\n\nThe cu...,## Comparison of ESG Performance with Historic...,1.0,0.52,0.217391,0.872454
