# RAG Evaluation


In [1]:
!uv pip install -q \
    requests==2.32.5 \
    python-dotenv==1.2.1 \
    tqdm==4.67.1 \
    litellm==1.78.5

In [None]:
import hashlib
import json
import random
import time
import uuid
from collections import defaultdict
from pathlib import Path

import litellm
import requests
from dotenv import load_dotenv
from tqdm import tqdm

load_dotenv()

True

In [None]:
docs_url = "https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json"
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course["course"]
    for doc in course["documents"]:
        doc["course"] = course_name
        documents.append(doc)

documents[0]

{'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’t forget to register in DataTalks.Club's Slack and join the channel.",
 'section': 'General course-related questions',
 'question': 'Course - When will the course start?',
 'course': 'data-engineering-zoomcamp'}

In [None]:
def generate_document_id(doc):
    combined = f"{doc['course']}-{doc['question']}-{doc['text'][:10]}"
    hash_object = hashlib.md5(combined.encode())
    hash_hex = hash_object.hexdigest()
    document_id = hash_hex[:8]

    return document_id

In [None]:
for doc in documents:
    doc["id"] = generate_document_id(doc)

In [None]:
documents[3]

{'text': "You don't need it. You're accepted. You can also just start learning and submitting homework without registering. It is not checked against any registered list. Registration is just to gauge interest before the start date.",
 'section': 'General course-related questions',
 'question': 'Course - I have registered for the Data Engineering Bootcamp. When can I expect to receive the confirmation email?',
 'course': 'data-engineering-zoomcamp',
 'id': '0bbf41ec'}

In [None]:
hashes = defaultdict(list)

for doc in documents:
    doc_id = doc["id"]
    hashes[doc_id].append(doc)

len(hashes), len(documents)

(947, 948)

In [None]:
for k, v in hashes.items():
    if len(v) > 1:
        print(k, len(v))

593f7569 2


In [None]:
hashes["593f7569"]

[{'text': "They both do the same, it's just less typing from the script.\nAsked by Andrew Katoch, Added by Edidiong Esu",
  'section': '6. Decision Trees and Ensemble Learning',
  'question': 'Does it matter if we let the Python file create the server or if we run gunicorn directly?',
  'course': 'machine-learning-zoomcamp',
  'id': '593f7569'},
 {'text': "They both do the same, it's just less typing from the script.",
  'section': '6. Decision Trees and Ensemble Learning',
  'question': 'Does it matter if we let the Python file create the server or if we run gunicorn directly?',
  'course': 'machine-learning-zoomcamp',
  'id': '593f7569'}]

In [None]:
with open("documents-with-ids.json", "wt") as f_out:
    json.dump(documents, f_out, indent=2)

In [11]:
!head documents-with-ids.json

[
  {
    "text": "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  \u201cOffice Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon\u2019t forget to register in DataTalks.Club's Slack and join the channel.",
    "section": "General course-related questions",
    "question": "Course - When will the course start?",
    "course": "data-engineering-zoomcamp",
    "id": "c02e79ef"
  },
  {
    "text": "GitHub - DataTalksClub data-engineering-zoomcamp#prerequisites",


In [None]:
prompt_template = """
You are a student who's taking a course.
Formulate 5 questions you might ask based on a FAQ record.
If possible, use a fewer words as possible from the record.

The record:

section: {section}
question: {question}
answer: {text}

Provide the output in parsable JSON without using code blocks:

["question1", "question2", "question3"]
""".strip()

In [113]:
!docker exec -it ollama ollama pull gemma3:1b

[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠧ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠇ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 7cd4618c1faf: 100% ▕██████████████████▏ 815 MB                         [K
pulling e0a42594d802: 100% ▕██████████████████▏  358 B                         [K
pulling dd084c7d92a3: 100% ▕██████████████████▏ 8.4 KB                         [K
pulling 3116c5225075: 100% ▕██████████████████▏   77 B                         [K
pulling 120007c81bf8: 100% ▕██████████████████▏  492 B                         [K
verifying sha256 digest [K
writing manifest [K
success [K

In [112]:
!docker exec -it ollama ollama list

NAME         ID              SIZE      MODIFIED       
gemma3:1b    8648f39daa8f    815 MB    19 minutes ago    


In [None]:
response = litellm.completion(
    model="ollama/gemma3:1b",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Explain transformers in one paragraph."},
    ],
    api_base="http://localhost:11434",
    api_key="ollama",
    # format="json",
    custom_llm_provider="ollama",
)

print(response.choices[0].message["content"])

Okay, here's an explanation of Transformers in one paragraph:

Transformers are a revolutionary type of neural network architecture that have become incredibly popular in recent years, especially for natural language processing (NLP). Unlike previous models that relied on sequential processing, Transformers process entire sequences of data simultaneously, allowing them to understand relationships between words in a sentence or text much more effectively. They achieve this through a mechanism called "self-attention," which allows the model to weigh the importance of different words in a sequence when generating output, leading to significant improvements in accuracy and the ability to handle long-range dependencies in text.  Essentially, Transformers have shifted the focus from sequential processing to a more parallel and contextual understanding of data, making them incredibly powerful for tasks like translation, text generation, and even code completion.


In [None]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = litellm.completion(
        model="ollama/gemma3:1b",
        messages=[
            {"role": "user", "content": prompt},
        ],
        api_base="http://localhost:11434",
        api_key="ollama",
        format="json",
        temperature=0.7,
        top_p=0.9,
        max_tokens=300,
        repeat_penalty=1.1,
        custom_llm_provider="ollama",
    )

    json_response = response.choices[0].message.content
    return json_response

In [None]:
OUTPUT_PATH = Path("generated_questions.json")

if OUTPUT_PATH.exists():
    with OUTPUT_PATH.open("r", encoding="utf-8") as f:
        results = json.load(f)
else:
    results = {}

In [None]:
def save_results(data, path=OUTPUT_PATH):
    tmp_path = path.with_suffix(".tmp")
    with tmp_path.open("w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    tmp_path.replace(path)

In [119]:
for doc in tqdm(documents):
    doc_id = str(doc["id"])

    if doc_id in results:
        continue

    questions = generate_questions(doc)
    results[doc_id] = questions

    save_results(results)

  4%|▍         | 39/948 [59:41<23:11:17, 91.83s/it]


[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.






APIConnectionError: litellm.APIConnectionError: OllamaException - litellm.Timeout: Connection timed out after 600.0 seconds.

In [None]:
len(results)

13