# RAGAS based Evaluation (For the Vanilla Chain)

In [None]:
from dotenv import load_dotenv
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_groq import ChatGroq

load_dotenv()

os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

model = ChatGoogleGenerativeAI(model='gemini-2.0-flash-lite', google_api_key=os.environ["GOOGLE_API_KEY"])

from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('sample.pdf')
docs = loader.load()

text = ''

for i in range(len(docs)):
    text += docs[i].page_content

from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_text(text)

from langchain_core.documents import Document

docs = [Document(page_content=item) for item in chunks]

In [3]:
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001', google_api_key = os.environ['GOOGLE_API_KEY'])
vector_store = FAISS.from_documents(docs, embeddings)
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k':4})
from langchain_core.runnables import RunnableParallel, RunnableLambda, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
prompt = PromptTemplate(template="""
You are a helpful assistant.
Answer ONLY from the provided transcript context.
If the context is insufficient, just say you don't know.
Context: {context}
Question: {question}
                        """, input_variables=['context', 'question'])

In [4]:
def format_docs(retrieved_docs):
    # Handle both list[Document] and single Document
    if not isinstance(retrieved_docs, list):
        retrieved_docs = [retrieved_docs]

    return "\n\n".join(doc.page_content for doc in retrieved_docs)

parallel_chain = RunnableParallel(
    {
    "context": retriever | RunnableLambda(format_docs),
    "question": RunnablePassthrough()
    }
)
parser = StrOutputParser()
normal_chain = parallel_chain | prompt | model | parser
main_chain = parallel_chain | normal_chain

In [5]:
import json
from datasets import Dataset

# Load your json
with open("test_data.json", encoding="utf-8") as f:
    data = json.load(f)

# Flatten
rows = []
for entry in data:
    for q, g in zip(entry["question"], entry["ground_truth"]):
        rows.append({
            "question": q,
            "ground_truth": [g],   # ragas expects a list of answers
            "contexts": [],        # to be filled after retrieval
            "answer": ""           # to be filled after running RAG
        })

# Convert to HuggingFace Dataset
dataset = Dataset.from_list(rows)
print(dataset)


Dataset({
    features: ['question', 'ground_truth', 'contexts', 'answer'],
    num_rows: 78
})


In [6]:
from tqdm import tqdm

def run_rag(example):
    # 1. Retrieve docs separately (to save both docs & answer)
    retrieved_docs = retriever.invoke(example["question"])
    contexts = [doc.page_content for doc in retrieved_docs]

    # 2. Generate answer using your chain
    answer = normal_chain.invoke(example["question"])

    return {
        "contexts": contexts,
        "answer": answer
    }

# Apply to dataset
dataset_with_outputs = dataset.map(run_rag, num_proc=1)

print(dataset_with_outputs[0])




Map:   0%|          | 0/78 [00:00<?, ? examples/s]

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 8
}
].


{'question': "Who are the authors of the paper 'Speculative Thinking: Enhancing Small-Model Reasoning with Large Model Guidance at Inference Time'?", 'ground_truth': ['Wang Yang, Xiang Yue, Vipin Chaudhary, and Xiaotian Han'], 'contexts': ['efficiency while leveraging the large model’s strength when most needed.\na more accurate thinking step. The larger model then generates the next thought segment\nin place of the smaller model, effectively acting as a reasoning supervisor or corrector. This\nlarge-model-aided intervention may enhance the robustness and accuracy of smaller models\nby injecting stronger reasoning capabilities, thus balancing efficiency and performance.\n3 Method: Speculative Thinking\nWe propose a collaborative inference framework termed Speculative Thinking, where a\nsmall model acts as speculative model and a large model serves as target model. Speculative\nmodel performs primary reasoning, while target model intervenes selectively to provide\nauxiliary thoughts whe

In [7]:
from datasets import Dataset

# Convert ground_truth from list[str] -> str
def flatten_ground_truth(example):
    example["ground_truth"] = example["ground_truth"][0]  # take the first (and only) element
    return example

dataset_ready = dataset_with_outputs.map(flatten_ground_truth)

Map:   0%|          | 0/78 [00:00<?, ? examples/s]

In [8]:
os.environ["RAGAS_TRACKING_DISABLED"] = "true"
os.environ["OPENAI_API_KEY"] = "dummy"

In [9]:
# from langchain_groq import ChatGroq

In [None]:
from ragas import evaluate
from ragas.metrics import (
    answer_correctness,
    context_precision,
    faithfulness,
    context_recall
)

# gemini_llm = ChatGroq(model="openai/gpt-oss-20b", temperature=0, groq_api_key = "...)
gemini_llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-lite", google_api_key="...")

result = evaluate(
    dataset_ready,
    metrics=[answer_correctness, context_precision, faithfulness, context_recall],
    llm=gemini_llm
)
# print(result)

Evaluating:   0%|          | 0/312 [00:00<?, ?it/s]

Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 36
}
].
Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 36
}
].
Retrying langchain_google_

In [14]:
print(result)

{'answer_correctness': nan, 'context_precision': 0.8093, 'faithfulness': 0.8497, 'context_recall': 0.8333}


# Evaluation Results

- **context_precision**: 0.8093
- **faithfulness**: 0.8497
- **context_recall**: 0.8333