In [1]:
import minsearch
import json
from openai import OpenAI
client = OpenAI()

In [2]:
with open('documents.json') as f:
    docs_raw = json.load(f)


documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [3]:
index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"],
)

index.fit(documents)

<minsearch.Index at 0x7966fe2dac20>

In [18]:
def search(query, num_results=5):
    boosts = {'question': 3.0, 'section': 0.5}
    
    results = index.search(
        query=query,
        filter_dict={"course": "data-engineering-zoomcamp"},
        boost_dict=boosts,
        num_results=num_results
    )

    return results

In [12]:
def build_prompt(query, search_results):
    prompt_template = """
    You're a course assistant helping a student with a question about the course. Answer the QUESTION based on the CONTEXT from the FAQ database.
    Use only the information provided in the CONTEXT.
    If the CONTEXT does not contain answer, output NONE.

    QUESTION: {question}

    CONTEXT:
    {context}
    """.strip()

    context = ""

    for doc in search_results:
        context += f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()

    return prompt

In [13]:
def llm(prompt):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": prompt},
        ]
    )

    return response.choices[0].message.content

In [20]:
def rag(query):
    results = search(query, 10)
    prompt = build_prompt(query, results)
    answer = llm(prompt)
    return answer

In [21]:
query = "how do I run kafka?"
rag(query)

'To run Kafka producer/consumer/kstreams/etc in the terminal, you can use the following command in the project directory:\n\n```sh\njava -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java\n```\n\nMake sure to replace `<jar_name>` with the actual name of your jar file.\n\nIf the CONTEXT does not contain answer, output NONE.'