| Metric                | Dimension    | What it tells you                                     |
| --------------------- | ------------ | ----------------------------------------------------- |
| **Faithfulness**      | Groundedness | Answer is based on retrieved docs (not hallucinated). |
| **ResponseRelevancy** | Efficacy     | Answer actually addresses the question.               |
| **AnswerCorrectness** | Coherence    | Answer matches the expected ground truth.             |
| **ContextRecall**     | Completeness | Retrieved context covers all needed info.             |
| **ContextPrecision**  | Precision    | Retrieved context avoids irrelevant info.             |


In [34]:
from ragas import evaluate
from ragas.metrics import Faithfulness, ResponseRelevancy, AnswerCorrectness, ContextRecall, ContextPrecision
from datasets import Dataset
from ragas.llms import LangchainLLMWrapper
from langchain_openai import ChatOpenAI


data = {
    "question": ["Who wrote 1984?"],
    "answer": ["1984 was written by George Orwell."],
    "contexts": [["1984 is a dystopian novel written by George Orwell in 1949."]],
    "ground_truth": ["George Orwell wrote the novel 1984."]
}
dataset = Dataset.from_dict(data)

llm = LangchainLLMWrapper(ChatOpenAI(api_key=config["OPEN_AI_API_KEY"], model="gpt-4o-mini", temperature=0.1))
embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings(api_key=config["OPEN_AI_API_KEY"], model="text-embedding-3-small"))

metrics = [
    Faithfulness(),        # Groundedness
    ResponseRelevancy(),   # Efficacy
    AnswerCorrectness(),   # Coherence
    ContextRecall(),       # Completeness
    ContextPrecision()     # Precision
]

results = evaluate(dataset, metrics=metrics, llm=llm, embeddings=embeddings, show_progress=False)
print(results)

{'faithfulness': 1.0000, 'answer_relevancy': 0.9564, 'answer_correctness': 0.9503, 'context_recall': 1.0000, 'context_precision': 1.0000}
