## Video 2 from the agents workshop

In [2]:
# Video link https://www.youtube.com/watch?v=yS_hwnJusDk
# notebook discussed is https://github.com/alexeygrigorev/rag-agents-workshop/blob/main/agents-part2.ipynb

# we have to enter OpenAI key here and then we can run all cells freely...
import os
from getpass import getpass
from openai import OpenAI

if not (openai_api_key := os.getenv("OPENAI_API_KEY")):
    openai_api_key = getpass("🔑 Enter your OpenAI API key: ")
os.environ["OPENAI_API_KEY"] = openai_api_key

🔑 Enter your OpenAI API key:  ········


## Download FAQ data and set up minsearch 

In [3]:
import json
from minsearch import AppendableIndex

import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)



In [4]:
documents[2]

{'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
 'section': 'General course-related questions',
 'question': 'Course - Can I still join the course after the start date?',
 'course': 'data-engineering-zoomcamp'}

In [5]:
# set up minsearch index

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x7abf4f5d5430>

## Start OpenAI and create CourseFAQTools class

In [6]:
from openai import OpenAI
client = OpenAI()

In [10]:
## import chat_assistant_2 module for class CourseFAQTools:
# !wget https://raw.githubusercontent.com/alexeygrigorev/rag-agents-workshop/refs/heads/main/chat_assistant_2.py

from chat_assistant_2 import IPythonChatInterface, Tools, ChatAssistant
##

In [8]:
from typing import Any, Dict, List

class CourseFAQTools:

    def __init__(self, index):
        self.index = index

    def search(self, query: str) -> List[Dict[str, Any]]:
        """
        Search the FAQ database for entries matching the given query.
    
        Args:
            query (str): Search query text to look up in the course FAQ.
    
        Returns:
            List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.
        """
        boost = {'question': 3.0, 'section': 0.5}
    
        results = self.index.search(
            query=query,
            filter_dict={'course': 'data-engineering-zoomcamp'},
            boost_dict=boost,
            num_results=5,
            output_ids=True
        )
    
        return results


    def add_entry(self, question: str, answer: str) -> None:
        """
        Add a new entry to the FAQ database.
    
        Args:
            question (str): The question to be added to the FAQ database.
            answer (str): The corresponding answer to the question.
        """
        doc = {
            'question': question,
            'text': answer,
            'section': 'user added',
            'course': 'data-engineering-zoomcamp'
        }
        self.index.append(doc)



In [11]:
# lets try to use this new class

faq_tools = CourseFAQTools(index)

In [12]:
# add our FAQ tool to the tools collection and check the result
tools = Tools()
tools.add_tools(faq_tools)
tools.get_tools()

[{'type': 'function',
  'name': 'add_entry',
  'description': 'Add a new entry to the FAQ database.\n\nArgs:\n    question (str): The question to be added to the FAQ database.\n    answer (str): The corresponding answer to the question.',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'question parameter'},
    'answer': {'type': 'string', 'description': 'answer parameter'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'search',
  'description': 'Search the FAQ database for entries matching the given query.\n\nArgs:\n    query (str): Search query text to look up in the course FAQ.\n\nReturns:\n    List[Dict[str, Any]]: A list of search result entries, each containing relevant metadata.',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'query parameter'}},
   'required': ['query'],
   'additionalProperties': False}}]

In [13]:
# create a RAG chat with our tools use

developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

Before making any function calls, explain your reasoning why you want to perform something.

When searching in our FAQ, perform multiple search queries with diffierently phrased questions.

At the end, ask the user a question to make it more engaging
""".strip()

interface = IPythonChatInterface()

chat = ChatAssistant(
    tools=tools,
    developer_prompt=developer_prompt,
    interface=interface,
    openai_client=client
)


In [14]:
# type STOP to exit the chat

chat.run()

User: Can I register if course is started already?


User: stop


chat ended
