In [1]:
# Get Data from documents

import requests
from openai.types.responses import ResponseFunctionToolCall

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()


documents = []

for record in documents_raw:
    course_name = record['course'] #data-engineering-zoomcamp#

    for element in record['documents']: #documents[]
        element['course'] = course_name
        documents.append(element)

In [2]:
# Create an Appendable Index ( Advantage is that we can keep adding to an appendable index)
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

# to append , we can use the following function
#index.append(XXXX);


<minsearch.append.AppendableIndex at 0x106bba0c0>

In [3]:
# Create the search function to get results from Search Index create above

def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )

    return results

In [5]:
#define the search tool

search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}


In [26]:
#Create Open AI Client
from openai import OpenAI
openai_client = OpenAI()

# Set System prompt
instructions = """
You're a course teaching assistant.
You're given a question from a course student and your task is to answer it.
""".strip()

# Define the tools. in our case we are including search tool. More tools can be provided
tools = [search_tool]

question = 'I just discovered the course. Can I still join it?'
chat_messages = [
    {"role": "developer", "content": instructions}, #intermediary
    {"role": "user", "content": question} #from users
]

# Send the tools defines along with the system prompt and user question
response = openai_client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=tools
)


In [9]:
import json
#Checkout the response. This will have instructions asking to call the search tool to get the data.
response.output[0]

ResponseFunctionToolCall(arguments='{"query":"Can I still join the course?"}', call_id='call_N4oSprdIBzoF0e4adrR3RT9Q', name='search', type='function_call', id='fc_052320d5576cfe350068ee7c7b6a648193991312d2678dea9e', status='completed')

In [38]:
# write a function that takes in the call object and returns back a json formatted value
# that can be passed onto the Open AI call with chat messages

def call_search_tool(call):
    arguments = json.loads(call.arguments)
    query = arguments['query']
    search_results = json.dumps(search(query))
    return json.dumps({
        "type": "function_call_output",
        "call_id": call.call_id,
        "output": search_results
    })

In [39]:
import json
# let's put the response in an object called callParameters
# The key here is
## name : This is the name of the tool
## type : How we expect the tool to work. In our case it's a function_call
## id : An id that will be used by OpenAI to match the response of the function call
## arguments : The arguments that should be provided to the tool

call = response.output[0]


In [40]:
# append the result to chat messages and send it back to OpenAI

chat_messages.append(json.dumps(call_search_tool(call)))

print(chat_messages[0])
print(chat_messages[1])
print(chat_messages[2])


{'role': 'developer', 'content': "You're a course teaching assistant.\nYou're given a question from a course student and your task is to answer it."}
{'role': 'user', 'content': 'I just discovered the course. Can I still join it?'}
{"type": "function_call_output", "call_id": "call_N4oSprdIBzoF0e4adrR3RT9Q", "output": "[{\"text\": \"Yes, even if you don't register, you're still eligible to submit the homeworks.\\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.\", \"section\": \"General course-related questions\", \"question\": \"Course - Can I still join the course after the start date?\", \"course\": \"data-engineering-zoomcamp\"}, {\"text\": \"No, you can only get a certificate if you finish the course with a \\u201clive\\u201d cohort. We don't award certificates for the self-paced mode. The reason is you need to peer-review capstone(s) after submitting a project. You can only peer-review projects at the

In [37]:
# Send the tools defines along with the system prompt and user question and tool output

print(chat_messages)
print(tools)

response_with_tool_output = openai_client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=tools
)

[{'role': 'developer', 'content': "You're a course teaching assistant.\nYou're given a question from a course student and your task is to answer it."}, {'role': 'user', 'content': 'I just discovered the course. Can I still join it?'}, '{"type": "function_call_output", "call_id": "call_N4oSprdIBzoF0e4adrR3RT9Q", "output": "[{\\"text\\": \\"Yes, even if you don\'t register, you\'re still eligible to submit the homeworks.\\\\nBe aware, however, that there will be deadlines for turning in the final projects. So don\'t leave everything for the last minute.\\", \\"section\\": \\"General course-related questions\\", \\"question\\": \\"Course - Can I still join the course after the start date?\\", \\"course\\": \\"data-engineering-zoomcamp\\"}, {\\"text\\": \\"No, you can only get a certificate if you finish the course with a \\\\u201clive\\\\u201d cohort. We don\'t award certificates for the self-paced mode. The reason is you need to peer-review capstone(s) after submitting a project. You can

BadRequestError: Error code: 400 - {'error': {'message': "Invalid type for 'input[2]': expected an input item, but got a string instead.", 'type': 'invalid_request_error', 'param': 'input[2]', 'code': 'invalid_type'}}

In [None]:
response_with_tool_oupput.response