In [17]:
import minsearch

In [18]:
import json

In [19]:
with open('document.json', 'rt') as file_in:
    raw_docs = json.load(file_in)

In [20]:
documents = []

for course_dict in raw_docs:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [21]:
index = minsearch.Index(
    text_fields = ['question', 'text', 'section'],
    keyword_fields = ['course']
)

In [23]:
index.fit(documents)

<minsearch.minsearch.Index at 0x75275e67f560>

In [24]:
def search_documents(query):
    boost = {'question': 2.0, 'section': 0.5}
    filter = {'course': 'data-engineering-zoomcamp'}
    
    return index.search(
        query = query,
        boost_dict = boost,
        filter_dict = filter,
        num_results = 5
    )

In [25]:
from openai import OpenAI

In [26]:
client = OpenAI(
    base_url="http://localhost:11434/v1/",
    api_key="ollama"
)

In [33]:
def llm(prompt):
    response = client.chat.completions.create(
        model= 'deepseek-r1:1.5b', # 'gemma3:1b',
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

In [28]:
def build_prompt(query, search_results):
    prompt_template = """
    You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
    Use only the facts from the CONTEXT when answering the QUESTION.
    If the CONTEXT does not have the Answer then output as you could not find the answer
    
    QUESTION: {question}
    
    CONTEXT: 
    {context}
    """.strip()
    
    context = ""
        
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    return prompt_template.format(question=query, context=context).strip()

In [29]:
def rag(query):
    search_results = search_documents(query)
    prompt = build_prompt(query, search_results)
    return llm(prompt)

In [34]:
query = "How can install kafka ?"

In [35]:
rag(query)

'<think>\nOkay, I need to figure out how to install kafka using Confluent Kafka. Let me look through the context provided.\n\nThe section about Python Kafka mentions installing dependencies with pip and conda. The answers are:\n\npip install confluent-kafka or conda install conda-forge::python-confluent-kafka\nfastavro: pip install fastavro\n\nWait, but I think Confluent Kafka is a Conecter to Conex client library (CL). So for Python users, they\'d just use those pip commands. For Java users? There\'s an alternative provided which says to use the kafka-python-ng CLI because older releases have issues.\n\nSo if someone wants to run this in their terminal, they need to have the KNAPP pausable CLI installed as an additional dependency. The answer for that is to install it with:\n\nnps install k Nap\n\nIt also notes that after installing, you can set up the kafka jar correctly and import it into a Java application.\n\nIn summary, Confluent Kafka packages are available only in Python using 