In [1]:
import json

In [2]:
with open('data/de-zoomcamp-processed.json', 'r') as f_in:
    de_zoomcamp_data = json.load(f_in)

In [3]:
def sliding_window(seq, size, step):
    if size <= 0 or step <= 0:
        raise ValueError("size and step must be positive")

    n = len(seq)
    result = []
    for i in range(0, n, step):
        chunk = seq[i:i+size]
        result.append({'start': i, 'chunk': chunk})
        if i + size >= n:
            break

    return result

In [4]:
de_zoomcamp_chunks = []

for doc in de_zoomcamp_data:
    doc_copy = doc.copy()
    doc_content = doc_copy.pop('content')
    chunks = sliding_window(doc_content, 2000, 1000)
    for chunk in chunks:
        chunk.update(doc_copy)
    de_zoomcamp_chunks.extend(chunks)

In [5]:
from minsearch import Index

In [6]:
index = Index(
    text_fields=["chunk", "filename"],
)

index.fit(de_zoomcamp_chunks)

<minsearch.minsearch.Index at 0x242a2cbc2f0>

In [10]:
from typing import List, Any

def text_search(query: str) -> List[Any]:
    """
    Perform a text-based search on the FAQ index.

    Args:
        query (str): The search query string.

    Returns:
        List[Any]: A list of up to 5 search results returned by the FAQ index.
    """
    return index.search(query, num_results=5)

In [9]:
from openai import OpenAI

from toyaikit.llm import OpenAIClient
from toyaikit.tools import Tools
from toyaikit.chat import IPythonChatInterface
from toyaikit.chat.runners import OpenAIResponsesRunner

In [11]:
tools = Tools()
tools.add_tool(text_search)

In [13]:
developer_prompt = """
You are a helpful assistant for a  course. 

Use the search tool to find relevant information from the course materials before answering questions.

If you can find specific information through search, use it to provide accurate answers.
If the search doesn't return relevant results, let the user know and provide general guidance.
""".strip()

In [15]:
chat_interface = IPythonChatInterface()

openai_client = OpenAIClient(model="gpt-4o-mini")

runner = OpenAIResponsesRunner(
    tools=tools,
    developer_prompt=developer_prompt,
    chat_interface=chat_interface,
    llm_client=openai_client
)

In [18]:
runner.run();

You: How do I run kafka in Python?


You: stop


Chat ended.


In [19]:
from pydantic_ai import Agent

In [21]:
agent = Agent(
    name="faq_agent",
    instructions=developer_prompt,
    tools=[text_search],
    model='gpt-4o-mini'
)

In [22]:
question = "how do I install Kafka in Python?"
result = await agent.run(user_prompt=question)
print(result.output)

To install and use Kafka with Python, you can use the `kafka-python` library or the `confluent-kafka` library. Here’s a general guide to get you started:

### 1. Install Required Libraries

You can install the `kafka-python` library using pip:

```bash
pip install kafka-python
```

For the `confluent-kafka` library, you can install it using:

```bash
pip install confluent-kafka
```

### 2. Set Up Kafka

Before you run your Python scripts, ensure that you have a Kafka server running. You can use Docker to quickly set up Kafka if you prefer that approach. Check the following steps:

#### Using Docker

You can pull the Kafka image and run it as follows:

```bash
docker run -d --name zookeeper -p 2181:2181 zookeeper:3.7
docker run -d --name kafka --link zookeeper -p 9092:9092 wurstmeister/kafka
```

### 3. Create a Kafka Producer and Consumer

Here's a simple example using `kafka-python` to create a producer and consumer:

#### Kafka Producer Example

```python
from kafka import KafkaProdu