In [None]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [None]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

In [None]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [None]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>
""".strip()

def build_prompt(query, search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [None]:
from openai import OpenAI
client = OpenAI()

def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [None]:
rag("When does the course start?")

In [None]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
{question}
</QUESTION>

<CONTEXT> 
{context}
</CONTEXT>

If CONTEXT is EMPTY, you can use our FAQ database.
In this case, use the following output template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>"
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}
""".strip()

In [None]:
question = "how do I run docker on gentoo?"
context = "EMPTY"

prompt = prompt_template.format(question=question, context=context)
print(prompt)

answer = llm(prompt)
print(answer)

In [None]:
question = "how do I join the course?"
context = "EMPTY"

prompt = prompt_template.format(question=question, context=context)
answer = llm(prompt)
print(answer)

In [None]:
def build_context(search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"

    return context.strip()

In [None]:
search_results = search(question)
context = build_context(search_results)
prompt = prompt_template.format(question=question, context=context)
print(prompt)

In [None]:
answer = llm(prompt)
print(answer)

In [26]:
import json

def agentic_rag_v1(question):
    context = "EMPTY"
    prompt = prompt_template.format(question=question, context=context)
    answer_json = llm(prompt)
    answer = json.loads(answer_json)
    print(answer)

    if answer['action'] == 'SEARCH':
        print('need to perform search...')
        search_results = search(question)
        context = build_context(search_results)
        
        prompt = prompt_template.format(question=question, context=context)
        answer_json = llm(prompt)
        answer = json.loads(answer_json)
        print(answer)

    return answer

In [27]:
agentic_rag_v1('how do I join the course?')

{'action': 'SEARCH', 'reasoning': 'The context is empty, and I need to refer to the FAQ database to find information on how to join the course.'}
need to perform search...
{'action': 'ANSWER', 'answer': "To join the course, you need to register before the course starts using the provided registration link. You can also subscribe to the course's public Google Calendar for updates and join the Telegram channel for announcements. The course begins on 15th January 2024 at 17h00, at which point the first 'Office Hours' session will take place.", 'source': 'CONTEXT'}


{'action': 'ANSWER',
 'answer': "To join the course, you need to register before the course starts using the provided registration link. You can also subscribe to the course's public Google Calendar for updates and join the Telegram channel for announcements. The course begins on 15th January 2024 at 17h00, at which point the first 'Office Hours' session will take place.",
 'source': 'CONTEXT'}

In [28]:
agentic_rag_v1('how patch KDE under FreeBSD?')

{'action': 'ANSWER', 'answer': "To patch KDE under FreeBSD, you typically need to follow these steps: \n1. **Install the Development Environment**: Make sure you have the necessary development tools and libraries installed, such as 'devel/git' and 'devel/gmake'.\n2. **Obtain the Source Code**: Fetch the KDE source code from the KDE repositories or FreeBSD's ports collection. You can use `git` or `portsnap` to get the latest ports.\n3. **Create a Patch**: Modify the source code as required and create a patch file using `diff` or similar tools. For instance, use `diff -u original_file modified_file > mypatch.patch` to create your patch.\n4. **Apply the Patch**: Navigate to the directory containing the source code and apply your patch using the `patch` command: `patch < mypatch.patch`.\n5. **Build and Install**: Once the patch is applied, you can build the source using 'make' and install it using 'make install'. Make sure to resolve any dependencies that arise during the compile process.\

{'action': 'ANSWER',
 'answer': "To patch KDE under FreeBSD, you typically need to follow these steps: \n1. **Install the Development Environment**: Make sure you have the necessary development tools and libraries installed, such as 'devel/git' and 'devel/gmake'.\n2. **Obtain the Source Code**: Fetch the KDE source code from the KDE repositories or FreeBSD's ports collection. You can use `git` or `portsnap` to get the latest ports.\n3. **Create a Patch**: Modify the source code as required and create a patch file using `diff` or similar tools. For instance, use `diff -u original_file modified_file > mypatch.patch` to create your patch.\n4. **Apply the Patch**: Navigate to the directory containing the source code and apply your patch using the `patch` command: `patch < mypatch.patch`.\n5. **Build and Install**: Once the patch is applied, you can build the source using 'make' and install it using 'make install'. Make sure to resolve any dependencies that arise during the compile process.