In [1]:
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()

openai_client = OpenAI()

def llm(user_prompt, instructions=None, model="gpt-4o-mini"):
    messages = []

    if instructions:
        messages.append({
            "role": "system",
            "content": instructions
        })

    messages.append({
        "role": "user",
        "content": user_prompt
    })

    response = openai_client.responses.create(
        model=model,
        input=messages
    )

    return response.output_text

In [4]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [6]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x120f414c0>

In [7]:
import json

In [30]:
def serach(query):

    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [32]:
def make_call(call):
    args = json.loads(call.arguments)
    f_name = call.name

    if f_name == 'search':
        result = serach(**args)
    else:
        raise Error(f'unknown function {f_name}')

    return {
        "type": "function_call_output",
        "call_id": call.call_id,
        "output": json.dumps(result),
    }


In [9]:
search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

In [10]:
tools = [search_tool]

In [13]:
instructions = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

If you want to look up the answer, explain why before making the call
""".strip()

In [26]:
question = "I just discovered the course. Can I still join it?"

chat_messages = [
    {"role": "developer", "content": instructions},
    {"role": "user", "content": question}
]

In [27]:
for item in response.output:
    print(item)
    print('')

ResponseOutputMessage(id='msg_033a409bec44f89c0068edc93a42288192b194dc3757ef9920', content=[ResponseOutputText(annotations=[], text="To determine if you can still join the course, it's important to know the enrollment deadlines and any restrictions related to late registration. I'll look up the specific information about course enrollment to provide you with an accurate answer.", type='output_text', logprobs=[])], role='assistant', status='completed', type='message')

ResponseFunctionToolCall(arguments='{"query":"Can I still join the course?"}', call_id='call_sG8uYLjdXdhWDIZffcgHJDsf', name='search', type='function_call', id='fc_033a409bec44f89c0068edc93b98bc8192b9bf0a5a2b1c1627', status='completed')



In [29]:
response

Response(id='resp_0a12307440d523110068edc9895c58819ca23d855477d74170', created_at=1760414089.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gpt-4o-mini-2024-07-18', object='response', output=[ResponseOutputMessage(id='msg_0a12307440d523110068edc989b0d0819cbfa684a4eed10150', content=[ResponseOutputText(annotations=[], text="It's important to confirm whether the course has open enrollment or if there are any deadlines for registration. I will look up the course FAQ to find specific information about enrollment policies. This will ensure that you have accurate and up-to-date details regarding joining the course. \n\nLet me check that for you.", type='output_text', logprobs=[])], role='assistant', status='completed', type='message'), ResponseFunctionToolCall(arguments='{"query":"Can I still join the course?"}', call_id='call_yC7R37A4sb3F2deoalkoAytu', name='search', type='function_call', id='fc_0a12307440d523110068edc98b3964819ca063c161ab31bd14', status='com

In [33]:
while True: # the "agent" Loop
    response = openai_client.responses.create(
        model='gpt-4o-mini',
        input=chat_messages,
        tools=tools
    )

    has_function_calls = False
    chat_messages.extend(response.output)

    for item in response.output:
        if item.type == 'message':
            print('Assistant')
            print(item.content[0].text)
            print()

        if item.type == 'function_call':
            has_function_calls = True
            print('function call')
            print('     ', item.name, item.arguments)
            print()

            function_call_output = make_call(item)
            chat_messages.append(function_call_output)

    if not has_function_calls:
        break

Assistant
Before responding, it's important to check our FAQ for specific information about course enrollment deadlines or late join policies, as they can vary by course. I'll look up if there's any relevant information regarding how late a student can join.

function call
      search {"query":"join course late enrollment"}

Assistant
Yes, you can still join the course even after its start date. However, be aware that deadlines will still apply for submitting homework and final projects. It's best not to leave everything until the last minute! 

If you have any further questions about the enrollment process or specific deadlines, feel free to ask.

