# RAG Experiment

This notebook runs two experiments:
1. Query the LLM directly for a set of questions and store the answers.
2. Load PDF documents into Qdrant and re-run the questions using retrieval augmented generation (RAG).


In [1]:
import json
from pathlib import Path

from qdrant_utils import (
    answer_with_context,
    get_qdrant_client,
    load_pdf_and_chunk,
    embed_chunks,
    store_embeddings_in_qdrant,
    retrieve_similar_chunks,
)

from experiment_utils import (
    load_processed_ids,
    append_result_to_csv,
    precision_n,
    recall_n,
    f1_n
)

DATA_PATH = 'question.json'
PDF_DIR = Path('papers')
GROUND_TRUTH_CSV = "results_ground_truth.csv"
RAG_CSV = 'results_rag.csv'
COLLECTION_NAME = 'rag_papers_test2'




## Load evaluation questions
The JSON file contains questions grouped by paper and additional metadata questions.
We flatten them into a single list with their gold answers.

In [2]:
with open(DATA_PATH, 'r', encoding='utf-8') as f:
    data = json.load(f)['evaluation_dataset']

questions = []
# content questions per paper
for paper in data['papers']:
    for q in paper.get('questions', []):
        questions.append({
            'question_id': q['question_id'],
            'question': q['question'],
            'answer': q['answer']
        })

len(questions)


35

## Experiment 1: direct LLM answers
Each question is sent to the LLM without any additional context. The answers are saved to *results_ground_truth.csv*.

In [3]:
processed_ids = load_processed_ids(GROUND_TRUTH_CSV)

for q in questions:
    qid = q["question_id"]
    if qid in processed_ids:
        continue

    llm_answer = answer_with_context(q["question"], [])
    gold = q["answer"]

    # ROUGE-1
    p1 = precision_n(llm_answer, gold, 1)
    r1 = recall_n(llm_answer, gold, 1)
    f1_1 = f1_n(p1, r1)

    # ROUGE-2
    p2 = precision_n(llm_answer, gold, 2)
    r2 = recall_n(llm_answer, gold, 2)
    f1_2 = f1_n(p2, r2)

    row = {
        "question_id": qid,
        "question_string": q["question"],
        "answer_llm": llm_answer,
        "answer_gold": gold,
        "precision-1": p1,
        "recall-1": r1,
        "ROUGE-1": f1_1,
        "precision-2": p2,
        "recall-2": r2,
        "ROUGE-2": f1_2,
    }

    append_result_to_csv(row, GROUND_TRUTH_CSV)
    processed_ids.add(qid)  # avoid re-processing


## Load PDF documents into Qdrant
All PDF files are chunked, embedded and stored in the collection defined above.

In [4]:
client = get_qdrant_client()
for pdf in PDF_DIR.glob('*.pdf'):
    chunks = load_pdf_and_chunk(str(pdf))
    embeddings = embed_chunks(chunks)
    store_embeddings_in_qdrant(client, COLLECTION_NAME, chunks, embeddings)


## Experiment 2: RAG answers
For each question we retrieve relevant chunks from Qdrant and pass them to the LLM. Results are written to *results_rag.csv*.

In [None]:
processed_ids = load_processed_ids(RAG_CSV)

for q in questions:
    qid = q["question_id"]
    if qid in processed_ids:
        continue

    context = retrieve_similar_chunks(q["question"], client, COLLECTION_NAME, top_k=5)
    llm_answer = answer_with_context(q["question"], context)
    gold = q["answer"]

    # ROUGE-1
    p1 = precision_n(llm_answer, gold, 1)
    r1 = recall_n(llm_answer, gold, 1)
    f1_1 = f1_n(p1, r1)

    # ROUGE-2
    p2 = precision_n(llm_answer, gold, 2)
    r2 = recall_n(llm_answer, gold, 2)
    f1_2 = f1_n(p2, r2)

    row = {
        "question_id": qid,
        "question_string": q["question"],
        "answer_llm": llm_answer,
        "answer_gold": gold,
        "precision-1": p1,
        "recall-1": r1,
        "ROUGE-1": f1_1,
        "precision-2": p2,
        "recall-2": r2,
        "ROUGE-2": f1_2,
    }

    append_result_to_csv(row, RAG_CSV)
    processed_ids.add(qid)  # avoid re-processing
