In [1]:
from openai import OpenAI

openai_client = OpenAI()

In [2]:
import json

In [3]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [4]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x21213471160>

In [5]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )

    return results

search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

In [6]:
def make_call(call):
    args = json.loads(call.arguments)
    f_name = call.name
    f = globals()[f_name]
    result = f(**args)
    result_json = json.dumps(result, indent=2)
    return {
        "type": "function_call_output",
        "call_id": call.call_id,
        "output": result_json,
    }

RAG - retrieval augmented generation

In [7]:
developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

If you want to look up the answer, explain why before making the call. Use as many 
keywords from the user question as possible when making first requests.

Make multiple searches. Try to expand your search by using new keywords based on the results you
get from the search.

At the end, make a clarifying question based on what you presented and ask if there are 
other areas that the user wants to explore.
""".strip()

In [8]:
question = "I just discovered the course, can I join it now?"

In [9]:
chat_messages = [
    {"role": "developer", "content": developer_prompt},
    {"role": "user", "content": question}
]

In [10]:
while True:
    response = openai_client.responses.create(
        model='gpt-4o-mini',
        input=chat_messages,
        tools=[search_tool]
    )
    
    chat_messages.extend(response.output)

    has_function_calls = False
    
    for entry in response.output:
        if entry.type == 'message':
            print(entry.content[0].text)
        if entry.type == 'function_call':
            print(entry)
            result = make_call(entry)
            chat_messages.append(result)
            has_function_calls = True

    if has_function_calls == False:
        break

ResponseFunctionToolCall(arguments='{"query":"join course now"}', call_id='call_duHlzss45sRbTtmqsn6mMqn0', name='search', type='function_call', id='fc_68b5b9578b7c8191b1d5818b4fcf7b2905a76010493c254f', status='completed')
Yes, you can still join the course even if it has already started. You remain eligible to submit homeworks, but keep in mind that there are deadlines for turning in final projects. It's best not to leave everything to the last minute to ensure you meet those deadlines.

The course officially starts on **15th Jan 2024 at 17:00**, and it's recommended to register before this date. If you're interested in participating, make sure to enroll as soon as possible.

Would you like more information on how to register, or is there anything else specific about the course you want to explore?


In [11]:
from toyaikit.llm import OpenAIClient
from toyaikit.tools import Tools
from toyaikit.chat import IPythonChatInterface
from toyaikit.chat.runners import OpenAIResponsesRunner
from toyaikit.chat.runners import DisplayingRunnerCallback

In [12]:
agent_tools = Tools()
agent_tools.add_tool(search, search_tool)

In [14]:
chat_interface = IPythonChatInterface()

runner = OpenAIResponsesRunner(
    tools=agent_tools,
    developer_prompt=developer_prompt,
    chat_interface=chat_interface,
    llm_client=OpenAIClient()
)

In [18]:
callback = DisplayingRunnerCallback(chat_interface)

In [19]:
messages = runner.loop(prompt='how do I install kafka', callback=callback)

In [21]:
new_messages = runner.loop(
    prompt='I want to use docker',
    previous_messages=messages,
    callback=callback,
)

In [22]:
messages = runner.run();

You: I just discovered the course, can I still join?


You: deadlines


You: stop


Chat ended.


[{'role': 'developer',
  'content': "You're a course teaching assistant. \nYou're given a question from a course student and your task is to answer it.\n\nIf you want to look up the answer, explain why before making the call. Use as many \nkeywords from the user question as possible when making first requests.\n\nMake multiple searches. Try to expand your search by using new keywords based on the results you\nget from the search.\n\nAt the end, make a clarifying question based on what you presented and ask if there are \nother areas that the user wants to explore."},
 {'role': 'user',
  'content': 'I just discovered the course, can I still join?'},
 ResponseFunctionToolCall(arguments='{"query":"join course late enrollment"}', call_id='call_tlHybx9ElxaPpf1S84kxCMLp', name='search', type='function_call', id='fc_68b5ba6f695881a38e33fa8f30fe56ff022431b5e643f207', status='completed'),
 {'type': 'function_call_output',
  'call_id': 'call_tlHybx9ElxaPpf1S84kxCMLp',
  'output': '[\n  {\n    "t

In [26]:
from typing import List, Dict, Any

def search(query: str) -> List[Dict[str, Any]]:
    """
    Search the FAQ database for entries matching the given query.

    Args:
        query (str): Search query text to look up in the course FAQ.

    Returns:
        List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.
    """
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

def add_entry(question: str, answer: str) -> None:
    """
    Add a new entry to the FAQ database.

    Args:
        question (str): The question to be added to the FAQ database.
        answer (str): The corresponding answer to the question.
    """
    doc = {
        'question': question,
        'text': answer,
        'section': 'user added',
        'course': 'data-engineering-zoomcamp'
    }
    index.append(doc)

In [27]:
agent_tools = Tools()
agent_tools.add_tool(search)
agent_tools.add_tool(add_entry)

In [29]:
runner = OpenAIResponsesRunner(
    tools=agent_tools,
    developer_prompt=developer_prompt,
    chat_interface=chat_interface,
    llm_client=OpenAIClient()
)

In [30]:
runner.run();

You: how do I do well in module 1?


You: add this back to FAQ


You: stop


Chat ended.


In [31]:
index.docs[-1]

{'question': 'How do I do well in Module 1?',
 'text': '1. **Understand Core Concepts**: Familiarize yourself with the primary topics, especially related to Docker and Terraform.\n\n2. **Practice Hands-On Coding**: Work through coding exercises and install necessary packages like `psycopg2` using `pip install psycopg2-binary`. If you face errors, consider updating or uninstalling previous versions before reinstalling.\n\n3. **Check Environment Variables**: Learn to set environment variables correctly, especially for Python paths, to resolve import errors. For instance, you might need to edit your `PYTHONPATH`.\n\n4. **Utilize Documentation and Community Knowledge**: Refer to official course documentation and community forums for troubleshooting, as many errors are commonly discussed with shared solutions.\n\n5. **Ask Questions**: Engage with instructors and peers for clarity on complex topics.',
 'section': 'user added',
 'course': 'data-engineering-zoomcamp'}

In [32]:
class SearchTools:

    def __init__(self, index):
        self.index = index

    def search(self, query: str) -> List[Dict[str, Any]]:
        """
        Search the FAQ database for entries matching the given query.
    
        Args:
            query (str): Search query text to look up in the course FAQ.
    
        Returns:
            List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.
        """
        boost = {'question': 3.0, 'section': 0.5}
    
        results = self.index.search(
            query=query,
            filter_dict={'course': 'data-engineering-zoomcamp'},
            boost_dict=boost,
            num_results=5,
            output_ids=True
        )
    
        return results

    def add_entry(self, question: str, answer: str) -> None:
        """
        Add a new entry to the FAQ database.
    
        Args:
            question (str): The question to be added to the FAQ database.
            answer (str): The corresponding answer to the question.
        """
        doc = {
            'question': question,
            'text': answer,
            'section': 'user added',
            'course': 'data-engineering-zoomcamp'
        }
        self.index.append(doc)

In [33]:
search_tools = SearchTools(index)

agent_tools = Tools()
agent_tools.add_tools(search_tools)

In [34]:
agent_tools.get_tools()

[{'type': 'function',
  'name': 'add_entry',
  'description': 'Add a new entry to the FAQ database.\n\nArgs:\n    question (str): The question to be added to the FAQ database.\n    answer (str): The corresponding answer to the question.',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'question parameter'},
    'answer': {'type': 'string', 'description': 'answer parameter'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'search',
  'description': 'Search the FAQ database for entries matching the given query.\n\nArgs:\n    query (str): Search query text to look up in the course FAQ.\n\nReturns:\n    List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'query parameter'}},
   'required': ['query'],
   'additionalProperties': False}}]

In [36]:
question = input('You:')
print(question)

You: howdy


howdy


## OpenAI Agents SDK

In [38]:
from agents import Agent, function_tool

In [52]:
tools = [
    function_tool(search_tools.search),
    function_tool(search_tools.add_entry),
    # add it here
]

In [53]:
from toyaikit.tools import wrap_instance_methods
tools = wrap_instance_methods(function_tool, search_tools)

In [46]:
agent = Agent(
    name="faq_agent",
    instructions=developer_prompt,
    tools=tools,
    model='gpt-4o-mini'
)

In [48]:
from toyaikit.chat.runners import OpenAIAgentsSDKRunner

In [49]:
runner = OpenAIAgentsSDKRunner(
    chat_interface=chat_interface,
    agent=agent
)

In [51]:
await runner.run();

You: how do I do well in module 1?


You: stop


Chat ended.


## Pydantic AI

In [55]:
from pydantic_ai import Agent

tools = [
    search_tools.search,
    search_tools.add_entry
]

tools

[<bound method SearchTools.search of <__main__.SearchTools object at 0x00000212148C6510>>,
 <bound method SearchTools.add_entry of <__main__.SearchTools object at 0x00000212148C6510>>]

In [60]:
agent = Agent(
    name="faq_agent",
    instructions=developer_prompt,
    tools=tools,
    # model='gpt-4o-mini'
    model='anthropic:claude-3-7-sonnet-latest'
)

In [57]:
from toyaikit.chat.runners import PydanticAIRunner

In [61]:
runner = PydanticAIRunner(
    chat_interface=chat_interface,
    agent=agent
)

In [62]:
await runner.run()

You: how do I do well in module 1?


You: stop


Chat ended.


## MCP

agent <-> MCP server <-> tool

In [63]:
from toyaikit.mcp import MCPClient, SubprocessMCPTransport

In [64]:
command = "uv run python main.py".split()
workdir = "mcp_faq"

client = MCPClient(
    transport=SubprocessMCPTransport(
        server_command=command,
        workdir=workdir
    )
)

In [65]:
client.start_server()

Started server with command: uv run python main.py


In [66]:
client.initialize()

Sending initialize request...
Initialize response: {'protocolVersion': '2024-11-05', 'capabilities': {'experimental': {}, 'prompts': {'listChanged': False}, 'resources': {'subscribe': False, 'listChanged': False}, 'tools': {'listChanged': True}}, 'serverInfo': {'name': 'Demo 🚀', 'version': '1.13.1'}}


{'protocolVersion': '2024-11-05',
 'capabilities': {'experimental': {},
  'prompts': {'listChanged': False},
  'resources': {'subscribe': False, 'listChanged': False},
  'tools': {'listChanged': True}},
 'serverInfo': {'name': 'Demo 🚀', 'version': '1.13.1'}}

In [67]:
client.initialized()

Sending initialized notification...
Handshake completed successfully


In [68]:
client.get_tools()

Retrieving available tools...
Available tools: ['add_entry', 'search']


[{'name': 'add_entry',
  'description': 'Add a new entry to the FAQ database.\n\nArgs:\n    question (str): The question to be added to the FAQ database.\n    answer (str): The corresponding answer to the question.',
  'inputSchema': {'properties': {'question': {'title': 'Question',
     'type': 'string'},
    'answer': {'title': 'Answer', 'type': 'string'}},
   'required': ['question', 'answer'],
   'type': 'object'},
  '_meta': {'_fastmcp': {'tags': []}}},
 {'name': 'search',
  'description': 'Search the FAQ database for entries matching the given query.\n\nArgs:\n    query (str): Search query text to look up in the course FAQ.\n\nReturns:\n    List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.',
  'inputSchema': {'properties': {'query': {'title': 'Query',
     'type': 'string'}},
   'required': ['query'],
   'type': 'object'},
  'outputSchema': {'properties': {'result': {'items': {'additionalProperties': True,
      'type': 'object'},
     'tit

In [69]:
result = client.call_tool('search', {'query': 'how do I run docker?'})


Calling tool 'search' with arguments: {'query': 'how do I run docker?'}


In [74]:
from toyaikit.mcp import MCPTools

In [75]:
mcp_tools = MCPTools(client)

In [77]:
runner = OpenAIResponsesRunner(
    tools=mcp_tools,
    developer_prompt=developer_prompt,
    chat_interface=chat_interface,
    llm_client=OpenAIClient(model='gpt-4o-mini')
)

In [78]:
runner.run();

You: how do I insall kafka?


Calling tool 'search' with arguments: {'query': 'install Kafka'}


Calling tool 'search' with arguments: {'query': 'Kafka installation guide'}


Calling tool 'search' with arguments: {'query': 'how to set up Kafka on local machine'}


You: stop


Chat ended.


In [81]:
from pydantic_ai.mcp import MCPServerSSE

faq_mcp_client = MCPServerSSE(
    url='http://localhost:8000/sse'
)

In [82]:
agent = Agent(
    name="faq_agent",
    instructions=developer_prompt,
    model='anthropic:claude-3-7-sonnet-latest',
    toolsets=[faq_mcp_client]
)

In [83]:
runner = PydanticAIRunner(
    chat_interface=chat_interface,
    agent=agent
)

In [84]:
await runner.run();

You: how do I install kafka for python


You: stop


Chat ended.
