In [7]:
import os
from openai import OpenAI

In [9]:
openai_client = OpenAI()

In [10]:
groq_client = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=os.environ.get("GROQ_API_KEY")
)

In [11]:
response = openai_client.responses.create(
    model="gpt-4o-mini",
    input="Write a short bedtime story about a unicorn."
)

print(response.output_text)

**The Starry Night of Luna the Unicorn**

Once upon a time, in a magical forest filled with sparkling rivers and towering trees, lived a gentle unicorn named Luna. Her coat shimmered like the moonlight, and her mane flowed with colors of the rainbow. Every night, Luna would wander through the forest, spreading joy and light wherever she went.

One evening, as Luna grazed near a glistening pond, she noticed the sky turn a deep shade of purple. Suddenly, she saw a shooting star streak across the heavens. Luna's heart raced. "I wish to find the legendary Starflower," she whispered. It was said that the Starflower could grant one special wish.

Determined, Luna followed the twinkle of the stars that seemed to beckon her. She galloped through the moonlit forest, her hooves barely touching the ground, as fireflies danced around her like tiny lanterns.

After a while, Luna reached the foot of the Whispering Mountains, where the first rays of dawn began to paint the sky. There, at the top, was

In [12]:
response = groq_client.responses.create(
    model="openai/gpt-oss-20b",
    input="Write a short bedtime story about a unicorn."
)

print(response.output_text)

**The Moonlit Meadow**

Once upon a time, in a quiet valley where the grass grew soft and silver under the night sky, there lived a gentle unicorn named Liora. Her coat shimmered like fresh snow, and her horn glowed with a warm, amber light that made the flowers around her sparkle.

Every evening, after the sun had dipped below the hills, Liora would trot through the meadow, her hooves making a tiny, rhythmic thump that sounded like a lullaby. The wind whispered through the leaves, and the fireflies twinkled, as if they were tiny lanterns waiting to be lit.

One night, Liora noticed a little rabbit, trembling beside a fallen oak. ‚ÄúWhat‚Äôs wrong, little friend?‚Äù she asked, her voice as soft as a sigh.

The rabbit shook his head. ‚ÄúI lost my way home,‚Äù he sniffed. ‚ÄúI can‚Äôt see the path in the dark, and I‚Äôm so cold.‚Äù

Liora nudged her horn toward the rabbit and let out a gentle glow. The light danced on the ground, painting a glowing path that shone like a silver ribbon. ‚

In [13]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [18]:
documents[12]

{'text': 'The zoom link is only published to instructors/presenters/TAs.\nStudents participate via Youtube Live and submit questions to Slido (link would be pinned in the chat when Alexey goes Live). The video URL should be posted in the announcements channel on Telegram & Slack before it begins. Also, you will see it live on the DataTalksClub YouTube Channel.\nDon‚Äôt post your questions in chat as it would be off-screen before the instructors/moderators have a chance to answer it if the room is very active.',
 'section': 'General course-related questions',
 'question': 'Office Hours - What is the video/zoom link to the stream for the ‚ÄúOffice Hour‚Äù or workshop sessions?',
 'course': 'data-engineering-zoomcamp'}

In [None]:
!pip install minsearch

In [20]:
from minsearch import AppendableIndex

In [29]:
index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x72b9c18ead20>

In [30]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
    )

    return results

In [38]:
search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

In [33]:
result = search(question)

In [34]:
import json

In [21]:
question = 'I just discovered the course. Can I join it now?'

In [35]:
prompt = f"""
Answer the question from the student using the provided context

<QUESTION>{question}</QUESTION>

<CONTEXT>{json.dumps(result)}</CONTEXT>
"""

In [47]:
# agentic RAG

chat_messages = [
    {"role": "user", "content": question}
]

response = openai_client.responses.create(
    model="gpt-4o-mini",
    input=chat_messages,
    tools=[search_tool]
)

In [50]:
tool_call = response.output[0]
tool_call

ResponseFunctionToolCall(arguments='{"query":"Can I join the course now?"}', call_id='call_plxceNhVOrmUQC7gFD11ZDXb', name='search', type='function_call', id='fc_02a0dec5722556bc006925720835408197b73dca4d6adc1af8', status='completed')

In [51]:
chat_messages.append(tool_call)

In [49]:
search_result = search(query="Can I join the course now?")

In [52]:
result_json = json.dumps(search_result, indent=2)

chat_messages.append({
    "type": "function_call_output",
    "call_id": tool_call.call_id,
    "output": result_json,
})

In [53]:
chat_messages

[{'role': 'user',
  'content': 'I just discovered the course. Can I join it now?'},
 ResponseFunctionToolCall(arguments='{"query":"Can I join the course now?"}', call_id='call_plxceNhVOrmUQC7gFD11ZDXb', name='search', type='function_call', id='fc_02a0dec5722556bc006925720835408197b73dca4d6adc1af8', status='completed'),
 {'type': 'function_call_output',
  'call_id': 'call_plxceNhVOrmUQC7gFD11ZDXb',
  'output': '[\n  {\n    "text": "Yes, even if you don\'t register, you\'re still eligible to submit the homeworks.\\nBe aware, however, that there will be deadlines for turning in the final projects. So don\'t leave everything for the last minute.",\n    "section": "General course-related questions",\n    "question": "Course - Can I still join the course after the start date?",\n    "course": "data-engineering-zoomcamp"\n  },\n  {\n    "text": "No, you can only get a certificate if you finish the course with a \\u201clive\\u201d cohort. We don\'t award certificates for the self-paced mode. T

In [54]:
response = openai_client.responses.create(
    model="gpt-4o-mini",
    input=chat_messages,
    tools=[search_tool]
)

In [55]:
response.output_text

"Yes, you can still join the course, even if you haven't registered. You're eligible to submit homework, but keep in mind that there are deadlines for the final projects, so it's best not to leave everything until the last minute."

In [56]:
chat_messages.append(
    {"role": "user", "content": "but are you sure I can get my certificate?"}
)

In [57]:
response = openai_client.responses.create(
    model="gpt-4o-mini",
    input=chat_messages,
    tools=[search_tool]
)
response.output_text

'You can only receive a certificate if you complete the course with a "live" cohort. Certificates are not awarded for self-paced mode since you need to peer-review projects during the course. Make sure to join when the course is running to be eligible for the certificate!'