In [1]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [2]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x11eddabd0>

In [3]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results


In [4]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>
""".strip()

def build_prompt(query, search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [5]:
import ollama

def llm(prompt):
    response = ollama.chat(
        model="gemma3:12b",
        messages=[{"role": "user", "content": prompt}],
        stream=True
    )
    return response

def rag(query, debug=False):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    if debug:
        print(f"Prompt: {prompt}")
        print(f"Answer: {answer}")
    return answer


In [6]:
response = rag("What is the capital of France?", debug=True)

for chunk in response:
    print(chunk.message.content, end='', flush=True)

Prompt: You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
What is the capital of France?
</QUESTION>

<CONTEXT>
section: Module 1: Docker and Terraform
question: PGCLI - case sensitive use “Quotations” around columns with capital letters
answer: PULocationID will not be recognized but “PULocationID” will be. This is because unquoted "Localidentifiers are case insensitive. See docs.

section: Module 1: Docker and Terraform
question: PGCLI - no pq wrapper available.
answer: ImportError: no pq wrapper available.
Attempts made:
- couldn't import \dt
opg 'c' implementation: No module named 'psycopg_c'
- couldn't import psycopg 'binary' implementation: No module named 'psycopg_binary'
- couldn't import psycopg 'python' implementation: libpq library not found
Solution:
First, make sure your Python is set to 3.9, at least.
And the reason for that is we have had cases o

In [7]:
response = rag("is is possbile to learn data engineering in 6 months?")

for chunk in response:
        print(chunk.message.content, end='', flush=True)

This question cannot be answered from the provided context. The context does not contain information about how long it takes to learn data engineering.

In [8]:
response = rag("No module named 'psycopg2'")

for chunk in response:
    print(chunk.message.content, end='', flush=True)

Issue: You're encountering a `ModuleNotFoundError: No module named 'psycopg2'` error.

Solution: You can try installing `psycopg2-binary` using `pip install psycopg2-binary`. If that doesn's work, try updating it with `pip install psycopg2-binary --upgrade`. If the issue persists, try updating conda or pip before installing `psycopg2` again. If you're still facing errors related to `pg_config`, you may need to install PostgreSQL (e.g., `brew install postgresql` on macOS).