In [15]:
#Imports

import os
import gradio as gr
import json
import os
from openai import OpenAI
from dotenv import load_dotenv

In [2]:
# constants

MODEL_GPT = 'gpt-4o-mini'
MODEL_LLAMA = 'llama3.2'

In [3]:
# set up environment
openai = OpenAI()
OLLAMA_BASE_URL = "http://localhost:11434/v1"
ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')
MODEL = MODEL_LLAMA
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
   


system_message = """
You are an expert technical assistant who helps developers debug code, explain concepts,
and solve engineering problems.

If the user asks a general technical question, respond normally.

However, if the user explicitly asks to book a hotel or make a hotel reservation,
you MUST call the book_hotel tool instead of responding conversationally.
"""


API key looks good so far


In [4]:
# A SIMPLE HOTEL BOOKING TOOL

def book_hotel(name: str, city: str, nights: int, start_date: str):
    return f"Hotel successfully booked in {city} for {name} for {nights} nights starting {start_date}"

In [5]:
#TOOL OBJECT DEFINATION

tools = [
    {
        "type": "function",
        "function": {
            "name": "book_hotel",
            "description": "Book a hotel when a user asks to reserve or book accommodation",
            "parameters": {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "city": {"type": "string"},
                    "nights": {"type": "integer"},
                    "start_date": {"type": "string"}
                },
                "required": ["name", "city", "nights", "start_date"]
            }
        }
    }
]

In [6]:
# MODEL SWITCH

def get_client_and_model(choice):
    if choice == "OpenAI":
        return openai, MODEL_GPT
    else:
        return ollama, MODEL_LLAMA

In [None]:
# SPEECH TO TEXT

def transcribe_audio(audio):
    if audio is None:
        return ""

    with open(audio, "rb") as f:
        transcript = openai.audio.transcriptions.create(
            model="gpt-4o-mini-transcribe",
            file=f
        )
    return transcript.text

In [8]:
# TEXT â†’ SPEECH

def text_to_speech(text):
    speech_file = "response.mp3"

    with openai.audio.speech.with_streaming_response.create(
        model="gpt-4o-mini-tts",
        voice="alloy",
        input=text
    ) as response:
        response.stream_to_file(speech_file)

    return speech_file

In [13]:
# STREAMING TEXT CHAT


def chat(message, history, model_choice):

    history = history or []

    # add user message first
    history.append({"role":"user","content":message})

    # model router
    client, model = get_client_and_model(model_choice)   


    messages = [
        {"role":"system",
        "content":"You are a general technical assistant. Only call the hotel booking tool if the request is about booking a hotel."}
    ] + history

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        tools=tools,
        tool_choice="auto"
    )

    msg = response.choices[0].message

    # TOOL CALL
    if msg.tool_calls:

        for tool_call in msg.tool_calls:

            if tool_call.function.name == "book_hotel":

                args = json.loads(tool_call.function.arguments)

                result = book_hotel(
                    args["name"],
                    args["city"],
                    args["nights"],
                    args["start_date"]
                )

                messages.append(msg)

                messages.append({
                    "role":"tool",
                    "tool_call_id":tool_call.id,
                    "content":result
                })

        stream = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True
        )

    else:

        stream = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True
        )

    partial = ""

    # add empty assistant message to history
    history.append({"role":"assistant","content":""})

    for chunk in stream:
        if chunk.choices[0].delta.content:
            partial += chunk.choices[0].delta.content
            history[-1]["content"] = partial
            yield history

In [10]:
# VOICE CHAT + TOOL CALLING

def voice_chat(audio, history, model_choice):

    user_text = transcribe_audio(audio)

    if not user_text:
        return history, None

    history.append({"role":"user","content":user_text})

    client, model = get_client_and_model(model_choice)

    messages = [{"role":"system","content":system_message}] + history

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        tools=book_hotel,
        tool_choice="auto"
    )

    msg = response.choices[0].message

    if msg.tool_calls:

        tool_call = msg.tool_calls[0]
        args = json.loads(tool_call.function.arguments)

        result = book_hotel(**args)

        history.append(msg)

        history.append({
            "role":"tool",
            "tool_call_id": tool_call.id,
            "content":result
        })

        final = client.chat.completions.create(
            model=model,
            messages=[{"role":"system","content":system_message}] + history
        )

        reply = final.choices[0].message.content

    else:
        reply = msg.content

    history.append({"role":"assistant","content":reply})

    audio_reply = text_to_speech(reply)

    return history, audio_reply

In [11]:
# CLEAR BUTTON

def clear_all():
    return [], None, None

In [16]:
# GRADIO UI

with gr.Blocks() as demo:

    gr.Markdown("# ðŸŽ§ Technical Assistant with Voice + Tool Use")

    model_choice = gr.Radio(["OpenAI","Ollama"], value="OpenAI")

    chatbot = gr.Chatbot(type="messages")

    msg = gr.Textbox(placeholder="Ask a technical question...")
    send = gr.Button("Send")

    audio_input = gr.Audio(sources=["microphone"], type="filepath")
    voice_btn = gr.Button("Ask with Voice")

    audio_output = gr.Audio(autoplay=True)

    clear_btn = gr.Button("Clear Chat")

    # TEXT SUBMIT
    msg.submit(
        chat,
        inputs=[msg, chatbot, model_choice],
        outputs=chatbot
    ).then(lambda: "", outputs=msg)

    # BUTTON SUBMIT
    send.click(
        chat,
        inputs=[msg, chatbot, model_choice],
        outputs=chatbot
    ).then(lambda: "", outputs=msg)

    # VOICE SUBMIT
    voice_btn.click(
        voice_chat,
        inputs=[audio_input, chatbot, model_choice],
        outputs=[chatbot, audio_output]
    ).then(lambda: None, outputs=audio_input)

    # CLEAR
    clear_btn.click(
        clear_all,
        outputs=[chatbot, audio_input, audio_output]
    )

demo.launch(share=True, inbrowser=True)

* Running on local URL:  http://127.0.0.1:7870
* Running on public URL: https://0eecd474e4ff05230b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


