In [None]:
# !pip install --upgrade pip
# !pip install -q sentence-transformers
# !pip install ragas datasets

In [21]:
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import (
    context_recall,
    context_precision,
    faithfulness,
    answer_correctness,
    answer_similarity
)


In [22]:

import numpy as np
from sentence_transformers import SentenceTransformer, util
from dotenv import load_dotenv
from openai import OpenAI
import os

# --- Load API Key ---
load_dotenv(override=True)
my_api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI(api_key=my_api_key)


In [23]:
# --- Retriever: Get top-k docs ---
def get_top_k_similar(query, k=3):
    documents = [
        {"section": "Pay Policies", "content": "Employees are paid bi-weekly via direct deposit."},
        {"section": "Leave of Absence", "content": "Employees must submit a leave request for approval."},
        {"section": "Internet Use", "content": "Company internet must be used for work-related tasks only."}
    ]

    texts = [doc["content"] for doc in documents]
    model = SentenceTransformer("all-MiniLM-L6-v2")
    doc_vectors = model.encode(texts, convert_to_tensor=True)
    query_vec = model.encode(query, convert_to_tensor=True)

    similarities = util.cos_sim(query_vec, doc_vectors)[0].cpu().numpy()
    top_k_idx = np.argsort(similarities)[::-1][:k]

    return [documents[int(idx)] for idx in top_k_idx]



In [24]:



# --- Generator: Use OpenAI with retrieved docs ---
def generate_answer(query, contexts):
    context_text = " ".join(contexts)
    prompt = f"Answer the question based only on the following context:\n{context_text}\n\nQuestion: {query}\nAnswer:"
    
    completion = client.chat.completions.create(
        model="gpt-4o-mini",  # or gpt-3.5-turbo
        messages=[{"role": "user", "content": prompt}],
        max_tokens=200,
    )
    return completion.choices[0].message.content.strip()



In [25]:

# --- Build dataset for Ragas ---
def build_dataset():
    query = "How often do employees get paid?"
    retrieved_docs = get_top_k_similar(query, 3)
    contexts = [d["content"] for d in retrieved_docs]

    # Gold reference
    gold_answer = "Employees are paid bi-weekly via direct deposit."

    # Generate answer using LLM
    model_answer = generate_answer(query, contexts)

    examples = [
        {
            "question": query,
            "answer": model_answer,        # LLM-generated answer
            "contexts": contexts,         
            "reference": gold_answer,     
            "ground_truths": [gold_answer]
        }
    ]
    return Dataset.from_list(examples)



In [29]:


dataset = build_dataset()

# --- All metrics across retriever, generator, and end-to-end ---
all_metrics = [
    context_recall, context_precision,  # Retriever
    
    faithfulness, answer_correctness, answer_similarity  # Generator

]

results = evaluate(dataset, metrics=all_metrics)

print("\n🔹 Full RAG Evaluation Results")
print(results)


Evaluating: 100%|██████████| 5/5 [00:05<00:00,  1.16s/it]



🔹 Full RAG Evaluation Results
{'context_recall': 1.0000, 'context_precision': 1.0000, 'faithfulness': 1.0000, 'answer_correctness': 0.7408, 'answer_similarity': 0.9632}


In [None]:

# --- All metrics across retriever, generator, and end-to-end ---
all_metrics = [
    context_recall, context_precision,      # Retriever
    faithfulness, answer_correctness, answer_similarity  # Generator
]

results = evaluate(dataset, metrics=all_metrics)

print("\n🔹 Full RAG Evaluation Results (Aggregated)")
print(results)

# --- Show detailed per-example breakdown ---
print("\n🔹 Detailed Scores (per question & metric)")
for i, row in enumerate(dataset):
    print(f"\nQ{i+1}: {row['question']}")
    print(f"  Answer: {row['answer']}")
    print(f"  Ground Truth: {row['reference']}")
    print(f"  Contexts: {row['contexts']}")
    
    # Each metric is a column in `results` matching the dataset rows
    for metric in all_metrics:
        metric_name = metric.__name__ if hasattr(metric, "__name__") else str(metric)
        score = results[metric_name][i]
        print(f"    {metric_name}: {score:.3f}")


🔹 Detailed Scores (per question & metric)

Q1: How often do employees get paid?
  Answer: Employees get paid bi-weekly.
  Ground Truth: Employees are paid bi-weekly via direct deposit.
  Contexts: ['Employees are paid bi-weekly via direct deposit.', 'Employees must submit a leave request for approval.', 'Company internet must be used for work-related tasks only.']


KeyError: 'ContextRecall(_required_columns={<MetricType.SINGLE_TURN: \'single_turn\'>: {\'retrieved_contexts\', \'reference\', \'user_input\'}}, name=\'context_recall\', llm=None, output_type=<MetricOutputType.CONTINUOUS: \'continuous\'>, context_recall_prompt=ContextRecallClassificationPrompt(instruction=Given a context, and an answer, analyze each sentence in the answer and classify if the sentence can be attributed to the given context or not. Use only \'Yes\' (1) or \'No\' (0) as a binary classification. Output json with reason., examples=[(QCA(question=\'What can you tell me about albert Albert Einstein?\', context="Albert Einstein (14 March 1879 - 18 April 1955) was a German-born theoretical physicist, widely held to be one of the greatest and most influential scientists of all time. Best known for developing the theory of relativity, he also made important contributions to quantum mechanics, and was thus a central figure in the revolutionary reshaping of the scientific understanding of nature that modern physics accomplished in the first decades of the twentieth century. His mass-energy equivalence formula E = mc2, which arises from relativity theory, has been called \'the world\'s most famous equation\'. He received the 1921 Nobel Prize in Physics \'for his services to theoretical physics, and especially for his discovery of the law of the photoelectric effect\', a pivotal step in the development of quantum theory. His work is also known for its influence on the philosophy of science. In a 1999 poll of 130 leading physicists worldwide by the British journal Physics World, Einstein was ranked the greatest physicist of all time. His intellectual achievements and originality have made Einstein synonymous with genius.", answer=\'Albert Einstein, born on 14 March 1879, was a German-born theoretical physicist, widely held to be one of the greatest and most influential scientists of all time. He received the 1921 Nobel Prize in Physics for his services to theoretical physics. He published 4 papers in 1905. Einstein moved to Switzerland in 1895.\'), ContextRecallClassifications(classifications=[ContextRecallClassification(statement=\'Albert Einstein, born on 14 March 1879, was a German-born theoretical physicist, widely held to be one of the greatest and most influential scientists of all time.\', reason=\'The date of birth of Einstein is mentioned clearly in the context.\', attributed=1), ContextRecallClassification(statement=\'He received the 1921 Nobel Prize in Physics for his services to theoretical physics.\', reason=\'The exact sentence is present in the given context.\', attributed=1), ContextRecallClassification(statement=\'He published 4 papers in 1905.\', reason=\'There is no mention about papers he wrote in the given context.\', attributed=0), ContextRecallClassification(statement=\'Einstein moved to Switzerland in 1895.\', reason=\'There is no supporting evidence for this in the given context.\', attributed=0)]))], language=english), max_retries=1)'