In [8]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [9]:
documents[2]

{'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
 'section': 'General course-related questions',
 'question': 'Course - Can I still join the course after the start date?',
 'course': 'data-engineering-zoomcamp'}

In [10]:
import sys
sys.path.append("../01-intro")  # go up one level, then into folder 01-intro

In [11]:
import minsearch

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.Index at 0x76d7a5456840>

In [12]:
import pandas as pd
import os

In [13]:
from mistralai import Mistral

api_key = os.environ["MISTRAL_API_KEY"]
model = "mistral-large-latest"

client = Mistral(api_key=api_key)

In [14]:
chat_response = client.chat.complete(
    model= model,
    messages = [
        {
            "role": "user",
            "content": "What is the best French cheese?",
        },
    ]
)
print(chat_response.choices[0].message.content)

The "best" French cheese is highly subjective—it depends on personal taste, texture preferences, and how you plan to enjoy it (on a cheese board, melted, in cooking, etc.). However, here are some of the most **celebrated and iconic French cheeses**, each with its own unique character:

### **Top Contenders for "Best" French Cheese**
1. **Comté** (AOP)
   - **Type**: Hard, cooked pressed cheese (like Gruyère but nuttier).
   - **Why it’s great**: Aged 4 months to 3+ years, with flavors ranging from buttery and mild to crystalline, caramelized, and complex. Perfect for melting (fondue, croque-monsieur) or eating on its own.
   - **Best for**: Everyday eating, cooking, wine pairings (especially Jura wines).

2. **Roquefort** (AOP)
   - **Type**: Blue cheese (sheep’s milk).
   - **Why it’s great**: Creamy, tangy, and salty with a sharp blue mold. One of the oldest and most famous blue cheeses in the world.
   - **Best for**: Bold flavor lovers, pairing with Sauternes (sweet wine) or walnut

In [15]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results


In [16]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [17]:
def llm(prompt):
    response = client.chat.complete(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content

In [18]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [19]:
rag('how do I run kafka?')

"To run Kafka, follow these instructions based on your use case:\n\n### **For Java Kafka (Producer/Consumer/KStreams):**\nRun the following command in the project directory:\n```bash\njava -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java\n```\n(Replace `<jar_name>` with the actual JAR filename.)\n\n---\n\n### **For Python Kafka (Producer/Consumer):**\n1. **Set up a virtual environment** (if not already done):\n   ```bash\n   python -m venv env\n   source env/bin/activate  # Linux/MacOS\n   # OR\n   env\\Scripts\\activate     # Windows\n   pip install -r ../requirements.txt\n   ```\n   *(Activate the environment every time before running Python Kafka scripts.)*\n\n2. **Ensure Docker containers are running** (if applicable).\n\n3. **Fix permission issues** (if encountering `./build.sh: Permission denied`):\n   ```bash\n   chmod +x build.sh\n   ```\n\n4. **If you get `ModuleNotFoundError: No module named 'kafka.vendor.six.moves'`:**\n   Use the al

In [20]:
rag('the course has already started, can I still enroll?')

'Yes, you can still enroll in the course even though it has already started. You will be eligible to submit homeworks, but be mindful of the deadlines for final projects. Avoid leaving everything until the last minute.'