In [13]:
import json

In [14]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [15]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x2306c6a4050>

In [16]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )

    return results

search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

RAG - retrieval augmented generation

In [18]:
developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

If you want to look up the answer, explain why before making the call. Use as many 
keywords from the user question as possible when making first requests.

Make multiple searches. Try to expand your search by using new keywords based on the results you
get from the search.

At the end, make a clarifying question based on what you presented and ask if there are 
other areas that the user wants to explore.
""".strip()

In [21]:
from toyaikit.llm import OpenAIChatCompletionsClient

from toyaikit.tools import Tools
from toyaikit.chat import IPythonChatInterface
from toyaikit.chat.runners import DisplayingRunnerCallback
from toyaikit.chat.runners import OpenAIChatCompletionsRunner


In [22]:
import os
from openai import OpenAI

groq_client = OpenAI(
    api_key=os.getenv('GROQ_API_KEY'),
    base_url='https://api.groq.com/openai/v1'
)

llm_client = OpenAIChatCompletionsClient(
    model='openai/gpt-oss-20b',
    client=groq_client
)

In [23]:
agent_tools = Tools()
agent_tools.add_tool(search, search_tool)

In [24]:
chat_interface = IPythonChatInterface()

runner = OpenAIChatCompletionsRunner(
    tools=agent_tools,
    developer_prompt=developer_prompt,
    chat_interface=chat_interface,
    llm_client=llm_client
)

In [25]:
callback = DisplayingRunnerCallback(chat_interface)

In [26]:
messages = runner.loop(prompt='how do I install kafka', callback=callback)

-> Response received


-> Response received


-> Response received


-> Response received


Option,When to use it,What you’ll install
Plain Java binaries,"You want full control, no Docker.",OpenJDK + Apache Kafka tarball
Docker / Docker‑Compose,You prefer isolated containers or a quick spin‑up.,Official Kafka + Zookeeper images
Confluent Platform,"You need schema registry, REST proxy, or Confluent‑specific tools.","confluent CLI (includes Kafka, Zookeeper, etc.)"

Issue,Fix
NoBrokersAvailable,"Make sure the broker is listening on the advertised address (kafka.server.properties → listeners=PLAINTEXT://0.0.0.0:9092). In Docker, check KAFKA_ADVERTISED_LISTENERS."
“Connection timed out”,Verify that port 9092 is open and not blocked by a firewall.
“Bootstrap broker list is empty”,The bootstrap-server flag may be missing or misspelled.
“Zookeeper connection refused”,Ensure Zookeeper is up before starting Kafka (or set KAFKA_ZOOKEEPER_CONNECT correctly).
Java OutOfMemoryError,Increase KAFKA_HEAP_OPTS if you’re running many partitions.
Windows path issues,Use the Windows‑specific batch scripts under bin\windows\.


LookupError: ('Please check model name. Use list_all_models function to see list of supported models.', LookupError("Unable to find provider with model matching 'openai/gpt-oss-20b'"))

In [31]:
from typing import List, Dict, Any

class SearchTools:

    def __init__(self, index):
        self.index = index

    def search(self, query: str) -> List[Dict[str, Any]]:
        """
        Search the FAQ database for entries matching the given query.
    
        Args:
            query (str): Search query text to look up in the course FAQ.
    
        Returns:
            List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.
        """
        boost = {'question': 3.0, 'section': 0.5}
    
        results = self.index.search(
            query=query,
            filter_dict={'course': 'data-engineering-zoomcamp'},
            boost_dict=boost,
            num_results=5,
            output_ids=True
        )
    
        return results

    def add_entry(self, question: str, answer: str) -> None:
        """
        Add a new entry to the FAQ database.
    
        Args:
            question (str): The question to be added to the FAQ database.
            answer (str): The corresponding answer to the question.
        """
        doc = {
            'question': question,
            'text': answer,
            'section': 'user added',
            'course': 'data-engineering-zoomcamp'
        }
        self.index.append(doc)

In [32]:
search_tools = SearchTools(index)

agent_tools = Tools()
agent_tools.add_tools(search_tools)

In [33]:
agent_tools.get_tools()

[{'type': 'function',
  'name': 'add_entry',
  'description': 'Add a new entry to the FAQ database.\n\nArgs:\n    question (str): The question to be added to the FAQ database.\n    answer (str): The corresponding answer to the question.',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'question parameter'},
    'answer': {'type': 'string', 'description': 'answer parameter'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'search',
  'description': 'Search the FAQ database for entries matching the given query.\n\nArgs:\n    query (str): Search query text to look up in the course FAQ.\n\nReturns:\n    List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'query parameter'}},
   'required': ['query'],
   'additionalProperties': False}}]

In [36]:
question = input('You:')
print(question)

You: howdy


howdy


## Pydantic AI

In [34]:
from pydantic_ai import Agent

tools = [
    search_tools.search,
    search_tools.add_entry
]

tools

[<bound method SearchTools.search of <__main__.SearchTools object at 0x000002305807A660>>,
 <bound method SearchTools.add_entry of <__main__.SearchTools object at 0x000002305807A660>>]

In [40]:
agent = Agent(
    name="faq_agent",
    instructions=developer_prompt,
    tools=tools,
    model='groq:openai/gpt-oss-20b'
)

In [41]:
result = await agent.run(user_prompt='how do I run kafka')

from toyaikit.chat.runners import PydanticAIRunner

runner = PydanticAIRunner(
    chat_interface=chat_interface,
    agent=agent
)

await runner.run()