# Compare PDF RAG strategies
This notebook demonstrates how to evaluate retrieval augmented generation using text extracted from PDFs either via direct metadata extraction or with help from Mistral. Azure OpenAI provides the language models for answering questions and scoring results.


In [None]:
from pathlib import Path
from compare_pdf_rag import (
    extract_metadata_text, extract_mistral_text,
    build_vector_store, answer_questions, evaluate_rag
)


In [None]:
pdf_path = Path('example.pdf')  # path to your PDF
questions_path = Path('questions.txt')
truths_path = Path('answers.txt')
method = 'metadata'  # or 'mistral'
mistral_api_key = 'YOUR_MISTRAL_API_KEY'  # only needed for Mistral method


In [None]:
if method == 'metadata':
    text = extract_metadata_text(pdf_path)
else:
    text = extract_mistral_text(pdf_path, api_key=mistral_api_key)


In [None]:
store = build_vector_store(text)


In [None]:
questions = [q.strip() for q in questions_path.read_text().splitlines() if q.strip()]
truths = [t.strip() for t in truths_path.read_text().splitlines() if t.strip()]


In [None]:
outputs = answer_questions(store, questions)


In [None]:
scores = evaluate_rag(outputs, truths)
scores
