# RAG System Demo

This notebook demonstrates the RAG system for Enhanced Groundedness.


In [None]:
import sys
sys.path.insert(0, '..')

from src.retrievers.sparse import BM25Retriever
from src.retrievers.dense import DenseRetriever
from src.generators.llm_generator import LLMGenerator
from src.pipelines.base_rag import BaseRAGPipeline


## 1. Quick Start with Sample Documents


In [None]:
# Sample documents
documents = [
    "The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It is named after the engineer Gustave Eiffel, whose company designed and built the tower.",
    "The Great Wall of China is a series of fortifications made of stone, brick, tamped earth, wood, and other materials. It was built along the historical northern borders of China.",
    "The Colosseum is an oval amphitheatre in the centre of the city of Rome, Italy. It is the largest ancient amphitheatre ever built.",
    "Machu Picchu is a 15th-century Inca citadel situated on a mountain ridge in Peru. It is the most familiar icon of Inca civilization.",
    "The Taj Mahal is an ivory-white marble mausoleum on the right bank of the river Yamuna in Agra, India. It was commissioned in 1632 by the Mughal emperor Shah Jahan."
]


In [None]:
# Create BM25 retriever
sparse_retriever = BM25Retriever(top_k=3)
sparse_retriever.index(documents)

# Test retrieval
query = "Where is the Eiffel Tower located?"
results = sparse_retriever.retrieve(query)

print(f"Query: {query}")
print("\nRetrieved documents:")
for doc in results:
    print(f"  Score: {doc.score:.4f}")
    print(f"  Content: {doc.content[:100]}...\n")


## 2. Dense Retrieval


In [None]:
# Create Dense retriever (uses BGE embeddings by default)
dense_retriever = DenseRetriever(
    model_name="BAAI/bge-base-en-v1.5",
    top_k=3,
    device="cuda"  # Change to 'cpu' if no GPU
)
dense_retriever.index(documents)

# Test retrieval
results = dense_retriever.retrieve(query)

print(f"Query: {query}")
print("\nRetrieved documents (Dense):")
for doc in results:
    print(f"  Score: {doc.score:.4f}")
    print(f"  Content: {doc.content[:100]}...\n")


## 3. Full RAG Pipeline


In [None]:
# Create generator (OpenAI backend)
# Make sure to set OPENAI_API_KEY environment variable
generator = LLMGenerator(
    model_name="gpt-4o-mini",
    backend="openai"
)

# Create RAG pipeline
rag_pipeline = BaseRAGPipeline(
    retriever=dense_retriever,
    generator=generator,
    top_k=3
)


In [None]:
# Ask a question
question = "Who designed the Eiffel Tower?"
result = rag_pipeline.query(question)

print(f"Question: {question}")
print(f"\nAnswer: {result.answer}")
print(f"\nLatency: {result.latency_ms:.2f} ms")
print(f"\nRetrieved {len(result.retrieved_documents)} documents")


## 4. Local Model (HuggingFace)


In [None]:
# Create generator with local model
local_generator = LLMGenerator(
    model_name="google/flan-t5-large",
    backend="huggingface",
    device="cuda"
)

# Create RAG pipeline with local model
local_rag = BaseRAGPipeline(
    retriever=dense_retriever,
    generator=local_generator,
    top_k=3
)

result = local_rag.query(question)
print(f"Question: {question}")
print(f"Answer: {result.answer}")


## 5. Evaluation


In [None]:
from src.evaluation.retrieval_metrics import RetrievalEvaluator, QAEvaluator

# Sample evaluation data
test_questions = [
    "Where is the Eiffel Tower?",
    "What is the Colosseum?",
    "Where is Machu Picchu?"
]
ground_truths = [
    "Paris, France",
    "An oval amphitheatre in Rome, Italy",
    "Peru"
]

# Run queries
results = rag_pipeline.batch_query(test_questions)
predictions = [r.answer for r in results]

# Evaluate
qa_evaluator = QAEvaluator()
metrics = qa_evaluator.evaluate(predictions, ground_truths)

print("QA Evaluation Results:")
print(f"  Exact Match: {metrics['exact_match']:.4f}")
print(f"  F1 Score: {metrics['f1']:.4f}")
