In [1]:
from dotenv import load_dotenv

load_dotenv()


True

In [2]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [3]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)



<minsearch.append.AppendableIndex at 0x7f8bf065b400>

In [4]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )

    return results

In [5]:

search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

In [6]:
instructions = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

If you want to look up the answer, explain why before making the call
""".strip()

In [7]:
question = 'I just discovered the course. Can I still join it?'

In [38]:
from toyaikit.llm import OpenAIClient
from toyaikit.chat import IPythonChatInterface
from toyaikit.chat.runners import OpenAIResponsesRunner
from toyaikit.chat.runners import DisplayingRunnerCallback
from toyaikit.tools import Tools

# TODO: Check the toyaikit

In [39]:
agent_tools = Tools()
agent_tools.add_tool(search, search_tool)

In [40]:
chat_interface = IPythonChatInterface()

runner = OpenAIResponsesRunner(
    tools=agent_tools,
    developer_prompt=instructions,
    chat_interface=chat_interface,
    llm_client=OpenAIClient()
)

In [36]:
callback = DisplayingRunnerCallback(chat_interface)

In [37]:
runner.run();

You:Do will on module 1


You:How can i do well in module 1?


You:stop
Chat ended.


In [19]:
results = runner.loop(
    prompt=question,
    callback=callback
)

In [20]:
results.cost

CostInfo(input_cost=7.89e-05, output_cost=7.56e-05, total_cost=0.0001545)

In [21]:
def add_entry(question, answer):
    doc = {
        'question': question,
        'text': answer,
        'section': 'user added',
        'course': 'data-engineering-zoomcamp'
    }
    index.append(doc)

In [23]:
add_entry_tool = {
    "type": "function",
    "name": "add_entry",
    "description": "Add an entry to the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "The question to be added to the FAQ database",
            },
            "answer": {
                "type": "string",
                "description": "The answer to the question",
            }
        },
        "required": ["question", "answer"],
        "additionalProperties": False
    }
}

In [24]:
agent_tools.add_tool(add_entry, add_entry_tool)

In [25]:
runner.run();

You:do well in module 1


You:Save it back to FAQ


You:Stop
Chat ended.


In [26]:
index.docs[-1]

{'question': 'How can I do well in Module 1?',
 'text': "1. **Understand the Basics**: Make sure you grasp the foundational concepts of Docker and Terraform, as these are crucial for practical applications.\n2. **Hands-On Practice**: Engage with hands-on exercises and examples provided in the module. Setting up your own Docker environment and experimenting with Terraform will deepen your understanding.\n3. **Refer to Documentation**: Utilize official documentation for Docker and Terraform. Documentation often contains valuable insights and troubleshooting tips.\n4. **Join Study Groups**: Collaborating with peers can provide different perspectives on complex topics and clarify doubts.\n5. **Ask Questions**: If you encounter difficulties, don't hesitate to ask questions in discussion forums or seek help from instructors.",
 'section': 'user added',
 'course': 'data-engineering-zoomcamp'}

In [29]:
from typing import Any, Dict, List

class SearchTools:

    def __init__(self, index):
        self.index = index
    
    def search(self, query: str) -> List[Dict[str, Any]]:
        """
        Search the index for documents related to the given query.
    
        Args:
            query (str): The search query string.
    
        Returns:
            List[Dict[str, Any]]: A list of search results, where each result is a 
            dictionary containing document fields and their corresponding values.
        """
        boost = {'question': 3.0, 'section': 0.5}
    
        results = self.index.search(
            query=query,
            filter_dict={'course': 'data-engineering-zoomcamp'},
            boost_dict=boost,
            num_results=5,
        )
    
        return results

    def add_entry(self, question: str, answer: str) -> None:
        """
        Add a new question-answer entry to the index.
    
        Args:
            question (str): The question text to be added.
            answer (str): The corresponding answer text.
    
        Returns:
            None
        """
        doc = {
            'question': question,
            'text': answer,
            'section': 'user added',
            'course': 'data-engineering-zoomcamp'
        }
        self.index.append(doc)

In [30]:
agent_tools = Tools()

In [31]:
search_tools = SearchTools(index)
agent_tools.add_tools(search_tools)

In [32]:
agent_tools.get_tools()

[{'type': 'function',
  'name': 'add_entry',
  'description': 'Add a new question-answer entry to the index.\n\nArgs:\n    question (str): The question text to be added.\n    answer (str): The corresponding answer text.\n\nReturns:\n    None',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'question parameter'},
    'answer': {'type': 'string', 'description': 'answer parameter'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'search',
  'description': 'Search the index for documents related to the given query.\n\nArgs:\n    query (str): The search query string.\n\nReturns:\n    List[Dict[str, Any]]: A list of search results, where each result is a \n    dictionary containing document fields and their corresponding values.',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'query parameter'}},
   'required': ['query'],