Follow along this tutorial: https://github.com/alexeygrigorev/rag-agents-workshop

In [None]:
!pip install minsearch



In [1]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [2]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x7fba240bd340>

In [3]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [4]:
question = 'Can I still join the course?'

In [5]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>
""".strip()

def build_prompt(query, search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [6]:
search_results = search(question)

In [7]:
prompt = build_prompt(question, search_results)

In [8]:
from dotenv import load_dotenv
load_dotenv()  # 會讀取當前目錄下 .env

True

In [9]:
!pip install -q -U google-genai

In [10]:
import requests
import json
import os
from google import genai

def llm(prompt):
  client = genai.Client()

  response = client.models.generate_content(
      model="gemini-2.5-flash", contents=prompt
  )
  return response.text

In [11]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [12]:
rag("How do I patch KDE under FreeBSD?")


'I am sorry, but the provided FAQ context does not contain any information about patching KDE under FreeBSD. Therefore, I cannot answer your question based on the available information.'

## "Agentic" RAG

In [13]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
{question}
</QUESTION>

<CONTEXT> 
{context}
</CONTEXT>

If CONTEXT is EMPTY, you can use our FAQ database.
In this case, use the following output template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>"
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}
""".strip()

In [14]:
question = 'Can I still join the course?'
context = 'EMPTY'

In [15]:
prompt = prompt_template.format(question=question, context=context)
print(prompt)

You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
Can I still join the course?
</QUESTION>

<CONTEXT> 
EMPTY
</CONTEXT>

If CONTEXT is EMPTY, you can use our FAQ database.
In this case, use the following output template:

{
"action": "SEARCH",
"reasoning": "<add your reasoning here>"
}

If you can answer the QUESTION using CONTEXT, use this template:

{
"action": "ANSWER",
"answer": "<your answer>",
"source": "CONTEXT"
}

If the context doesn't contain the answer, use your own knowledge to answer the question

{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}


In [16]:
answer_json = llm(prompt)

In [17]:
print(answer_json)

{
"action": "SEARCH",
"reasoning": "The student is asking about joining the course. This information is typically found in an FAQ section related to enrollment, registration deadlines, or late admissions policies. Since the context is empty, I need to search the FAQ database to find the relevant policy."
}


In [18]:
import json

In [19]:
answer = json.loads(answer_json)

In [20]:
answer['action']

'SEARCH'

In [21]:
def build_context(search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    return context.strip()

In [22]:
search_results = search(question)
context = build_context(search_results)
prompt = prompt_template.format(question=question, context=context)

In [23]:
answer_json = llm(prompt)

In [24]:
print(answer_json)

{
"action": "ANSWER",
"answer": "Yes, you can still join the course even after the start date. You are eligible to submit homeworks even if you haven't registered. However, be aware that there will be deadlines for turning in the final projects, so it's advisable not to leave everything until the last minute.",
"source": "CONTEXT"
}


## Agentic Search

In [25]:
def dedup(seq):
    seen = set()
    result = []
    for el in seq:
        _id = el['_id']
        if _id in seen:
            continue
        seen.add(_id)
        result.append(el)
    return result

In [26]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than {max_iterations} iterations for a given student question.
The current iteration number: {iteration_number}. If we exceed the allowed number 
of iterations, give the best possible answer with the provided information.

Output templates:

If you want to perform search, use this template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>",
"keywords": ["search query 1", "search query 2", ...]
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER_CONTEXT",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}

<QUESTION>
{question}
</QUESTION>

<SEARCH_QUERIES>
{search_queries}
</SEARCH_QUERIES>

<CONTEXT> 
{context}
</CONTEXT>

<PREVIOUS_ACTIONS>
{previous_actions}
</PREVIOUS_ACTIONS>
""".strip()

In [27]:
question = 'how do I do well on module 1'
max_iterations = 3
iteration_number = 0
search_queries = []
search_results  = []
previous_actions = []

In [28]:
context = build_context(search_results)

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=max_iterations,
    iteration_number=iteration_number
)

In [29]:
answer_json = llm(prompt)

In [30]:
answer = json.loads(answer_json)

In [31]:
previous_actions.append(answer)

In [32]:
keywords = answer['keywords']

In [33]:
for kw in keywords:
    search_queries.append(kw)
    sr = search(kw)
    search_results.extend(sr)

In [34]:
search_results = dedup(search_results)

In [35]:
iteration_number = 2

context = build_context(search_results)

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=max_iterations,
    iteration_number=iteration_number
)

In [36]:
answer_json = llm(prompt)

In [37]:
answer

{'action': 'SEARCH',
 'reasoning': 'The user is asking how to do well on Module 1. The CONTEXT is currently empty. I need to search the FAQ database for information related to Module 1, focusing on tips, strategies, or common advice for success.',
 'keywords': ['Module 1 success tips',
  'how to pass Module 1',
  'Module 1 study guide',
  'Module 1 assessment strategies']}

In [38]:
question = "what do I need to do to be successful at module 1?"

search_queries = []
search_results = []
previous_actions = []

iteration = 0

while True:
    print(f'ITERATION #{iteration}...')

    context = build_context(search_results)
    prompt = prompt_template.format(
        question=question,
        context=context,
        search_queries="\n".join(search_queries),
        previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
        max_iterations=3,
        iteration_number=iteration
    )

    print(prompt)

    answer_json = llm(prompt)
    # answer = json.loads(answer_json)
    try:
        answer = json.loads(answer_json)
    except json.JSONDecodeError:
        print("⚠️ JSON 格式錯誤，無法解析")
        break

    print(json.dumps(answer, indent=2))

    previous_actions.append(answer)

    action = answer['action']
    if action != 'SEARCH':
        break

    keywords = answer['keywords']
    search_queries = list(set(search_queries) | set(keywords))
    
    for k in keywords:
        res = search(k)
        search_results.extend(res)

    search_results = dedup(search_results)
    
    iteration = iteration + 1
    if iteration >= 4:
        break

    print()

ITERATION #0...
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than 3 iterations for a given student question.
The current 

In [39]:
answer

{'action': 'ANSWER_CONTEXT',
 'answer': "Based on the provided context, which highlights common technical issues encountered, to be successful in Module 1: Docker and Terraform, you will need to focus on the following key areas:\n\n1.  **Proficiency with Docker and Terraform:** The module title clearly indicates these are core technologies, and successful completion likely requires understanding and applying them effectively.\n2.  **Working with PostgreSQL:** Several listed issues relate to connecting to a PostgreSQL database, suggesting that managing and interacting with Postgres is a critical component of the module.\n3.  **Python Database Connectivity:** You'll need to be proficient in using Python to interact with databases, specifically utilizing libraries such as `SQLAlchemy` and `psycopg2` (or `psycopg2-binary`). The context addresses common `ModuleNotFoundError` for `psycopg2` and `TypeError` with SQLAlchemy's `create_engine` function, indicating these are specific challenges t

In [40]:
iteration

2

## Function calling ("tool use")

In [66]:

# ✅ 定義 tool：函數需有型別註解
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [None]:
def do_call(tool_call):
    # 預設 args_dict 空字典
    try:
        args_dict = dict(tool_call.args) if tool_call.args is not None else {}
    except Exception:
        args_dict = {}

    tool_name = getattr(tool_call, "name", None) or ""
    if not tool_name:
        raise ValueError("tool_call.name 是空，無法呼叫工具函式")

    tool_fn = globals().get(tool_name)
    if not tool_fn:
        raise ValueError(f"找不到工具函式: {tool_name}")

    try:
        result = tool_fn(**args_dict)
    except Exception as e:
        result = {"error": str(e)}

    return {
        "content": {
            "parts": [
                {
                    "text": json.dumps(result, indent=2)
                }
            ]
        }
    }


In [None]:
import os
import json
import google.generativeai as genai
from google.protobuf.json_format import MessageToDict


# ✅ 設定 API 金鑰
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

# ===== 建立模型並綁定 tools =====
model = genai.GenerativeModel(
    model_name="gemini-1.5-pro",
    tools=[search]
)

chat = model.start_chat()

developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.
If you look up something in FAQ, convert the student question into multiple queries.
""".strip()

chat.send_message(developer_prompt)

chat_messages = []

while True:
    question = input("🧑 請輸入問題（輸入 stop 結束）：")
    if question.lower() == "stop":
        break

    chat_messages.append({"role": "user", "content": question})

    # 發送問題
    response = chat.send_message(question)

    while True:
        part = response.candidates[0].content.parts[0]


        # if hasattr(part, "function_call"):
        if hasattr(part, "function_call") and part.function_call.name:

            tool_call = part.function_call
            print("part content:", part)
            
            if not tool_call.name:
                print("⚠️ tool_call.name 是空，跳過執行")
                break
            if tool_call.args is None:
                args_dict = {}
            else:
                args_dict = dict(tool_call.args)

            print(f"🔧 Gemini 要呼叫 tool: {tool_call.name}({args_dict})\n")

            tool_result_content = do_call(tool_call)

            # 回傳工具結果，只傳 content 結構給 Gemini
            response = chat.send_message(tool_result_content)

        else:
            chat_messages.append({"role": "assistant", "content": response.text})
            print("\n答：", response.text)
            break

print("\n🤖 Gemini 回答結束")


part content: function_call {
  name: "search"
  args {
    fields {
      key: "query"
      value {
        string_value: "FAQ: How do I do my best in module 1?"
      }
    }
  }
}

function_call.name: search
function_call.args: <proto.marshal.collections.maps.MapComposite object at 0x7fb9a1ef7d30>
🔧 Gemini 要呼叫 tool: search({'query': 'FAQ: How do I do my best in module 1?'})


答： The best practices for Module 1, which covers Docker and Terraform, include consulting the Docker Docs on Best Practices, especially regarding file system performance.  It's recommended to store your code in your default Linux distribution if you're using Windows 10/11 Home, as Docker runs on a WSL2 backend by default.  If you encounter a 'psycopg2' module error when working with Postgres, try installing or upgrading the `psycopg2-binary` package using pip.


🤖 Gemini 回答結束


## Multiple tools

In [76]:
!wget https://raw.githubusercontent.com/alexeygrigorev/rag-agents-workshop/refs/heads/main/chat_assistant.py

E0000 00:00:1752283967.063770    9187 backup_poller.cc:139] Run client channel backup poller: UNKNOWN:pollset_work {children:[UNKNOWN:epoll_wait: Bad file descriptor (9)]}
--2025-07-12 09:32:47--  https://raw.githubusercontent.com/alexeygrigorev/rag-agents-workshop/refs/heads/main/chat_assistant.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3485 (3.4K) [text/plain]
Saving to: ‘chat_assistant.py’


2025-07-12 09:32:47 (46.0 MB/s) - ‘chat_assistant.py’ saved [3485/3485]



In [77]:
def add_entry(question, answer):
    doc = {
        'question': question,
        'text': answer,
        'section': 'user added',
        'course': 'data-engineering-zoomcamp'
    }
    index.append(doc)

In [78]:
add_entry_description = {
    "type": "function",
    "name": "add_entry",
    "description": "Add an entry to the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "The question to be added to the FAQ database",
            },
            "answer": {
                "type": "string",
                "description": "The answer to the question",
            }
        },
        "required": ["question", "answer"],
        "additionalProperties": False
    }
}

In [83]:
import chat_assistant_gemini

tools = chat_assistant_gemini.Tools()
tools.add_tool(search, search_tool)

NameError: name 'search_tool' is not defined

In [None]:
tools.add_tool(add_entry, add_entry_description)

In [None]:
tools.get_tools()

[{'type': 'function',
  'name': 'search',
  'description': 'Search the FAQ database',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'Search query text to look up in the course FAQ.'}},
   'required': ['query'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'add_entry',
  'description': 'Add an entry to the FAQ database',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'The question to be added to the FAQ database'},
    'answer': {'type': 'string', 'description': 'The answer to the question'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}}]

In [None]:
developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

Use FAQ if your own knowledge is not sufficient to answer the question.

At the end of each response, ask the user a follow up question based on your answer.
""".strip()

chat_interface = chat_assistant.ChatInterface()

chat = chat_assistant.ChatAssistant(
    tools=tools,
    developer_prompt=developer_prompt,
    chat_interface=chat_interface,
    client=client
)

In [None]:
chat.run()

You: How do I do well in module 1?


You: add this to the FAQ database


You: stop


Chat ended.


In [None]:
index.docs[-1]

{'question': 'How do I do well in module 1?',
 'text': '1. Understand the Basics: Ensure you grasp foundational concepts like Docker and Terraform.\n2. Practice Regularly: Engage with practical assignments to reinforce concepts.\n3. Utilize Resources: Refer to course materials, documentation, and suggested readings.\n4. Ask Questions: Reach out if you encounter difficulties; engaging with peers or instructors can clarify your understanding.\n5. Review Feedback: Take time to review feedback after completing tasks to guide improvements.\n6. Stay Organized: Keep your work organized to easily trace back errors.',
 'section': 'user added',
 'course': 'data-engineering-zoomcamp'}

In [None]:
index

<minsearch.append.AppendableIndex at 0x233d5173b90>