# RAG Search

In [1]:
!pip install minsearch



In [2]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [3]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x724d28402e70>

In [4]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [5]:
question = 'Can I still join the course?'

In [6]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>
""".strip()

def build_prompt(query, search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [7]:
search_results = search(question)

In [8]:
prompt = build_prompt(question, search_results)

In [9]:
from openai import OpenAI
client = OpenAI()

def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

In [10]:
answer = llm(prompt)

In [11]:
print(answer)

Yes, you can still join the course even after the start date. You are eligible to submit homework without registering, but be mindful of deadlines for turning in the final projects.


In [12]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [13]:
rag("How do I patch KDE under FreeBSD?")

"I'm sorry, but there is no information available in the context about patching KDE under FreeBSD. Please refer to additional resources or documentation for guidance on this topic."

# Agentic RAG

In [14]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
{question}
</QUESTION>

<CONTEXT> 
{context}
</CONTEXT>

If CONTEXT is EMPTY, you can use our FAQ database.
In this case, use the following output template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>"
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}
""".strip()

In [15]:
question = 'How can I run Docker on Windows 10?'
context= 'EMPTY'

In [16]:
prompt = prompt_template.format(question=question, context=context)

In [17]:
answer = llm(prompt)

In [18]:
print(answer)

{
"action": "ANSWER",
"answer": "To run Docker on Windows 10, you'll need to follow these steps:\n\n1. **Check Windows Version**: Ensure you have Windows 10 Pro, Enterprise, or Education (64-bit) installed, as Docker Desktop requires Hyper-V to be enabled.\n\n2. **Enable Hyper-V**: Go to 'Control Panel' > 'Programs' > 'Turn Windows features on or off'. Check 'Hyper-V', then click OK. You may need to restart your computer.\n\n3. **Download Docker Desktop**: Visit the Docker website and download Docker Desktop for Windows.\n\n4. **Install Docker Desktop**: Run the installer and follow the prompts. Make sure to enable the option to use WSL 2 instead of Hyper-V if you prefer using WSL.\n\n5. **Start Docker**: After installation, launch Docker Desktop from your Start menu. It may take a few minutes for it to initialize.\n\n6. **Verify Installation**: Open a command prompt or PowerShell and run `docker --version`. You should see the installed version of Docker.\n\n7. **Run Your First Contain

In [19]:
question = 'Can I still join the course?'
context = 'EMPTY'

In [20]:
prompt = prompt_template.format(question=question, context=context)

In [21]:
answer_json = llm(prompt)

In [22]:
import json

In [23]:
answer = json.loads(answer_json)

In [24]:
print(answer)

{'action': 'SEARCH', 'reasoning': 'The student is inquiring about the possibility of joining the course, and as there is no specific context provided regarding enrollment timelines or criteria, I will refer to the FAQ database for this information.'}


In [25]:
answer['action']

'SEARCH'

In [26]:
# Here we separate the context and building the prompt. Thats the reason that the prompt portion is removed
def build_context(search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    return context.strip()

In [27]:
search_results = search(question)
context = build_context(search_results)
prompt = prompt_template.format(question=question, context=context)

In [28]:
answer_json = llm(prompt)

In [29]:
print(answer_json)

{
"action": "ANSWER",
"answer": "Yes, you can still join the course even after the start date. You're eligible to submit homework assignments, but keep in mind that there are deadlines for final projects, so it's best not to procrastinate.",
"source": "CONTEXT"
}


# Agentic Search

In [30]:
def dedup(seq):
    seen = set()
    result = []
    for el in seq:
        _id = el['_id']
        if _id in seen:
            continue
        seen.add(_id)
        result.append(el)
    return result

search_results = dedup(search_results)

In [31]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than {max_iterations} iterations for a given student question.
The current iteration number: {iteration_number}. If we exceed the allowed number 
of iterations, give the best possible answer with the provided information.

Output templates:

If you want to perform search, use this template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>",
"keywords": ["search query 1", "search query 2", ...]
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER_CONTEXT",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}

<QUESTION>
{question}
</QUESTION>

<SEARCH_QUERIES>
{search_queries}
</SEARCH_QUERIES>

<CONTEXT> 
{context}
</CONTEXT>

<PREVIOUS_ACTIONS>
{previous_actions}
</PREVIOUS_ACTIONS>
""".strip()

In [32]:
question = "how do I do well on Module 1"
max_iterations = 3
iteration_number = 0
search_queries= []
search_results = []
previous_actions = []

In [33]:
context = build_context(search_results)

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=max_iterations,
    iteration_number= iteration_number
)

In [34]:
answer_json = llm(prompt)

In [35]:
answer = json.loads(answer_json)

In [36]:
print(answer)

{'action': 'SEARCH', 'reasoning': 'To provide specific tips and resources regarding doing well in Module 1, I will search for information related to study strategies, assessment methods, and resources available for this module.', 'keywords': ['tips for doing well in Module 1', 'study strategies Module 1', 'Module 1 resources', 'Module 1 assessments']}


In [37]:
previous_actions.append(answer)

In [38]:
previous_actions

[{'action': 'SEARCH',
  'reasoning': 'To provide specific tips and resources regarding doing well in Module 1, I will search for information related to study strategies, assessment methods, and resources available for this module.',
  'keywords': ['tips for doing well in Module 1',
   'study strategies Module 1',
   'Module 1 resources',
   'Module 1 assessments']}]

In [39]:
keywords = answer['keywords']

In [40]:
for kw in keywords:
    search_queries.append(kw)
    sr = search(kw)
    search_results.extend(sr) # when we want to add a list with multiple 

In [41]:
len(search_results) # need to check if we have any duplicates

20

In [42]:
search_results = dedup(search_results)

In [43]:
len(search_results)

8

In [44]:
iteration_number = 2

context = build_context(search_results)

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=max_iterations,
    iteration_number= iteration_number
)

In [45]:
print(prompt)

You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than 3 iterations for a given student question.
The current iteration number

In [46]:
answer_json = llm(prompt)

In [47]:
print(answer_json)

{
"action": "SEARCH",
"reasoning": "To gather more specific information about the assessment criteria, recommended study strategies, and resources for Module 1 to effectively answer how to do well on it.",
"keywords": ["Module 1 study tips", "Module 1 assessment criteria", "recommended resources for Module 1", "Module 1 study strategies"]
}


In [48]:
question = "what do I need to do to be successful at module 1?"

search_queries = []
search_results = []
previous_actions = []


iteration = 0

while True:
    print(f'ITERATION #{iteration}...')

    context = build_context(search_results)
    prompt = prompt_template.format(
        question=question,
        context=context,
        search_queries="\n".join(search_queries),
        previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
        max_iterations=3,
        iteration_number=iteration
    )

    print(prompt)

    answer_json = llm(prompt)
    answer = json.loads(answer_json)
    print(json.dumps(answer, indent=2))

    previous_actions.append(answer)

    action = answer['action']
    if action != 'SEARCH':
        break

    keywords = answer['keywords']
    search_queries = list(set(search_queries) | set(keywords))
    
    for k in keywords:
        res = search(k)
        search_results.extend(res)

    search_results = dedup(search_results)
    
    iteration = iteration + 1
    if iteration >= 4:
        break

    print()

ITERATION #0...
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than 3 iterations for a given student question.
The current 

In [49]:
# Can also define it as a function
def agentic_search(question):
    search_queries = []
    search_results = []
    previous_actions = []

    iteration = 0
    
    while True:
        print(f'ITERATION #{iteration}...')
    
        context = build_context(search_results)
        prompt = prompt_template.format(
            question=question,
            context=context,
            search_queries="\n".join(search_queries),
            previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
            max_iterations=3,
            iteration_number=iteration
        )
    
        print(prompt)
    
        answer_json = llm(prompt)
        answer = json.loads(answer_json)
        print(json.dumps(answer, indent=2))

        previous_actions.append(answer)
    
        action = answer['action']
        if action != 'SEARCH':
            break
    
        keywords = answer['keywords']
        search_queries = list(set(search_queries) | set(keywords))

        for k in keywords:
            res = search(k)
            search_results.extend(res)
    
        search_results = dedup(search_results)
        
        iteration = iteration + 1
        if iteration >= 4:
            break
    
        print()

    return answer

In [50]:
agentic_search(question)

ITERATION #0...
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than 3 iterations for a given student question.
The current 

{'action': 'ANSWER',
 'answer': 'To be successful in Module 1, which focuses on Docker and Terraform, consider the following strategies: \n1. **Understand the Basics**: Make sure you have a solid understanding of Docker and Terraform principles. Familiarize yourself with containerization concepts and infrastructure as code.\n2. **Hands-On Practice**: Engage in practical exercises by setting up Docker containers and configuring Terraform for infrastructure management. The best way to learn is by doing.\n3. **Utilize Resources**: Take advantage of recommended readings, tutorials, or online resources related to Docker and Terraform. They can provide deeper insights and practical examples.\n4. **Active Participation**: Collaborate with peers, participate in discussions, and ask questions in forums related to the module. Engaging with others can enhance your understanding.\n5. **Complete Assignments on Time**: Stay on top of assignments and projects. Consistent work will prevent last-minute

## Function Calling ("tool use")

In [51]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [52]:
search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

In [53]:
question = "How do I do well in module 1?"

developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.
""".strip()

tools = [search_tool]

chat_messages = [
    {"role": "developer", "content": developer_prompt},
    {"role": "user", "content": question}
]

response = client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=tools
)
response.output

[ResponseFunctionToolCall(arguments='{"query":"how to do well in module 1"}', call_id='call_89tP9Y41Zv8OUDVeeGYXXmPB', name='search', type='function_call', id='fc_68760e807b448199945e1f85037bd3db0505d1073d365922', status='completed')]

In [54]:
calls = response.output
calls

[ResponseFunctionToolCall(arguments='{"query":"how to do well in module 1"}', call_id='call_89tP9Y41Zv8OUDVeeGYXXmPB', name='search', type='function_call', id='fc_68760e807b448199945e1f85037bd3db0505d1073d365922', status='completed')]

In [55]:
call = calls[0]
call

ResponseFunctionToolCall(arguments='{"query":"how to do well in module 1"}', call_id='call_89tP9Y41Zv8OUDVeeGYXXmPB', name='search', type='function_call', id='fc_68760e807b448199945e1f85037bd3db0505d1073d365922', status='completed')

In [56]:
call_id = call.call_id
call_id

'call_89tP9Y41Zv8OUDVeeGYXXmPB'

In [57]:
f_name = call.name
f_name

'search'

In [58]:
arguments = json.loads(call.arguments)
arguments

{'query': 'how to do well in module 1'}

In [59]:
# globals() is the method to get the environment variables
f = globals()[f_name]
f

<function __main__.search(query)>

In [60]:
results = f(**arguments)

In [61]:
# save the results as json
search_results = json.dumps(results, indent =2)
print(search_results)

[
  {
    "text": "Even after installing pyspark correctly on linux machine (VM ) as per course instructions, faced a module not found error in jupyter notebook .\nThe solution which worked for me(use following in jupyter notebook) :\n!pip install findspark\nimport findspark\nfindspark.init()\nThereafter , import pyspark and create spark contex<<t as usual\nNone of the solutions above worked for me till I ran !pip3 install pyspark instead !pip install pyspark.\nFilter based on conditions based on multiple columns\nfrom pyspark.sql.functions import col\nnew_final.filter((new_final.a_zone==\"Murray Hill\") & (new_final.b_zone==\"Midwood\")).show()\nKrishna Anand",
    "section": "Module 5: pyspark",
    "question": "Module Not Found Error in Jupyter Notebook .",
    "course": "data-engineering-zoomcamp",
    "_id": 322
  },
  {
    "text": "You need to look for the Py4J file and note the version of the filename. Once you know the version, you can update the export command accordingly, th

In [62]:
chat_messages.append(call)

chat_messages.append({
    "type": "function_call_output",
    "call_id": call.call_id,
    "output": search_results,
})

In [63]:
response = client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=tools
)

In [64]:
r = response.output[0]
print(r.content[0].text)

To do well in Module 1, here are some tips and common issues you might encounter:

1. **Understand the Basics**: Familiarize yourself with Docker and Terraform concepts, as they are fundamental to this module.

2. **Follow the Instructions Carefully**: Make sure to follow all installation instructions and setup guidelines provided in the course, especially for Docker and any dependencies.

3. **Resolve Common Errors**:
   - **Module Import Issues**: You may encounter errors like `ModuleNotFoundError: No module named 'psycopg2'`. To fix this, install the module using:
     ```bash
     pip install psycopg2
     ```
   - **SQLAlchemy Issues**: If you face issues with `create_engine`, ensure your connection string is formatted correctly:
     ```python
     conn_string = "postgresql+psycopg://username:password@localhost:port/database"
     engine = create_engine(conn_string)
     ```

4. **Utilize Resources**: Don’t hesitate to refer to course materials and community forums for troublesho

In [70]:
# If we want to make multiple calls"

developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.
If you look up something in FAQ, convert the student question into multiple queries.
""".strip()

chat_messages = [
    {"role": "developer", "content": developer_prompt},
    {"role": "user", "content": question}
]

response = client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=tools
)

In [71]:
def do_call(tool_call_response):
    function_name = tool_call_response.name
    arguments = json.loads(tool_call_response.arguments)

    f = globals()[function_name]
    result = f(**arguments)

    return {
        "type": "function_call_output",
        "call_id": tool_call_response.call_id,
        "output": json.dumps(result, indent=2),
    }

In [72]:
for entry in response.output:
    chat_messages.append(entry)
    print(entry.type)

    if entry.type == 'function_call':      
        result = do_call(entry)
        chat_messages.append(result)
    elif entry.type == 'message':
        print(entry.text) 

function_call
function_call
function_call


In [73]:
response = client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=tools
)

for entry in response.output:
    chat_messages.append(entry)
    print(entry.type)
    print()

    if entry.type == 'function_call':      
        result = do_call(entry)
        chat_messages.append(result)
    elif entry.type == 'message':
        print(entry.content[0].text) 

message

To do well in **Module 1**, here are some actionable tips and best practices:

1. **Understand the Basics of Docker and Terraform**:
   - Familiarize yourself with the fundamental concepts of Docker and Terraform. This will help you navigate the module effectively.

2. **Install Required Packages**:
   - Ensure that all necessary Python packages are installed. For example, if you encounter a `ModuleNotFoundError` for `psycopg2`, install it using:
     ```bash
     pip install psycopg2-binary
     ```
   - If that doesn't work, try:
     ```bash
     pip install psycopg2-binary --upgrade
     ```

3. **Follow Docker Best Practices**:
   - Store all your code in your default Linux distro for better file system performance, especially if you're on Windows (10 Home/11 Home) using Docker with WSL2 backend.

4. **Utilize Resources and Documentation**:
   - Refer to the Docker documentation on best practices for installation and configuration.

5. **Debugging Common Errors**:
   - If

In [74]:
developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

Use FAQ if your own knowledge is not sufficient to answer the question.
When using FAQ, perform deep topic exploration: make one request to FAQ,
and then based on the results, make more requests.

At the end of each response, ask the user a follow up question based on your answer.
""".strip()

chat_messages = [
    {"role": "developer", "content": developer_prompt},
]

In [None]:
while True: # main Q&A loop
    question = input() # How do I do my best for module 1?
    if question == 'stop':
        break

    message = {"role": "user", "content": question}
    chat_messages.append(message)

    while True: # request-response loop - query API till get a message
        response = client.responses.create(
            model='gpt-4o-mini',
            input=chat_messages,
            tools=tools
        )

        has_messages = False
        
        for entry in response.output:
            chat_messages.append(entry)
        
            if entry.type == 'function_call':      
                print('function_call:', entry)
                print()
                result = do_call(entry)
                chat_messages.append(result)
            elif entry.type == 'message':
                print(entry.content[0].text)
                print()
                has_messages = True

        if has_messages:
            break

In [71]:
if 'chat_assistant' in globals():
    print("Chat assistant available in environment")
else:
    !wget https://raw.githubusercontent.com/alexeygrigorev/rag-agents-workshop/refs/heads/main/chat_assistant.py

--2025-07-15 01:54:18--  https://raw.githubusercontent.com/alexeygrigorev/rag-agents-workshop/refs/heads/main/chat_assistant.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3485 (3.4K) [text/plain]
Saving to: ‘chat_assistant.py.1’


2025-07-15 01:54:19 (34.2 MB/s) - ‘chat_assistant.py.1’ saved [3485/3485]



In [72]:
import chat_assistant

tools = chat_assistant.Tools()
tools.add_tool(search, search_tool)

tools.get_tools()

developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

Use FAQ if your own knowledge is not sufficient to answer the question.

At the end of each response, ask the user a follow up question based on your answer.
""".strip()

chat_interface = chat_assistant.ChatInterface()

chat = chat_assistant.ChatAssistant(
    tools=tools,
    developer_prompt=developer_prompt,
    chat_interface=chat_interface,
    client=client
)

In [73]:
def add_entry(question, answer):
    doc = {
        'question': question,
        'text': answer,
        'section': 'user added',
        'course': 'data-engineering-zoomcamp'
    }
    index.append(doc)

In [74]:
add_entry_description = {
    "type": "function",
    "name": "add_entry",
    "description": "Add an entry to the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "The question to be added to the FAQ database",
            },
            "answer": {
                "type": "string",
                "description": "The answer to the question",
            }
        },
        "required": ["question", "answer"],
        "additionalProperties": False
    }
}

In [75]:
tools.add_tool(add_entry, add_entry_description)
tools.get_tools()

[{'type': 'function',
  'name': 'search',
  'description': 'Search the FAQ database',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'Search query text to look up in the course FAQ.'}},
   'required': ['query'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'add_entry',
  'description': 'Add an entry to the FAQ database',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'The question to be added to the FAQ database'},
    'answer': {'type': 'string', 'description': 'The answer to the question'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}}]

In [76]:
chat.run()

You: stop


Chat ended.


In [77]:
index.docs[-1]

{'text': 'Problem description\nInfrastructure created in AWS with CD-Deploy Action needs to be destroyed\nSolution description\nFrom local:\nterraform init -backend-config="key=mlops-zoomcamp-prod.tfstate" --reconfigure\nterraform destroy --var-file vars/prod.tfvars\nAdded by Erick Calderin',
 'section': 'Module 6: Best practices',
 'question': 'How to destroy infrastructure created via GitHub Actions',
 'course': 'mlops-zoomcamp'}