# Additional End of week Exercise - week 2

Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.

This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!

If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.

I will publish a full solution here soon - unless someone beats me to it...

There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results.

In [1]:
# imports

import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import anthropic
import gradio as gr
from pydub import AudioSegment

In [2]:
# Initialization

load_dotenv(override=True)

openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
MODEL = "gpt-4o-mini"
openai = OpenAI()
claude = anthropic.Anthropic()

OpenAI API Key exists and begins sk-proj-


In [3]:
system_message = "You are a helpful assistant for an Airline called FlightAI. "
system_message += "Give short, courteous answers, no more than 1 sentence. "
system_message += "Always be accurate. If you don't know the answer, say so."

In [4]:
tool_calls = [
    {
        "id": "call_1",  # for get_ticket_price
        "function": {
            "name": "get_ticket_price",
            "arguments": "{\"destination_city\":\"Hong Kong\"}"
        }
    },
    {
        "id": "call_2",  # for make_booking
        "function": {
            "name": "make_booking",
            "arguments": "{\"name\":\"Johnny 5\", ...}"
        }
    }
]

In [5]:
# We have to write that function handle_tool_call:

def handle_tool_call(message):
    tool_messages = []
    city = None

    for tool_call in message.tool_calls:
        tool_call = message.tool_calls[0]
        function_name = tool_call.function.name
        arguments = json.loads(tool_call.function.arguments)
        tool_call_id = tool_call.id

        if function_name == "get_ticket_price":
            result = get_ticket_price(arguments["destination_city"])
            city = arguments["destination_city"]
        elif function_name == "make_booking":
            result = make_booking(
                name=arguments["name"],
                destination_city=arguments["destination_city"],
                date=arguments.get("date")
            )
            city = arguments["destination_city"]
        else:
            result = f"Unknown function: {function_name}"


        tool_messages.append({
            "role": "tool",
            "tool_call_id": tool_call_id, 
            "name": function_name, 
            "content": result
        })

    return tool_messages, city

In [6]:
# Let's start by making a useful function

ticket_prices = {
    "london": "$799",
    "paris": "$899",
    "tokyo": "$1400",
    "berlin": "$499",
    "new york": "$350",
    "los angeles": "$150",
    "sydney": "$1350",
    "rome": "$875",
    "dubai": "$980",
    "toronto": "$425",
    "singapore": "$1295",
    "bangkok": "$1100",
    "amsterdam": "$845",
    "barcelona": "$880",
    "cairo": "$1025",
    "mumbai": "$950",
    "hong kong": "$1200",
    "seoul": "$1180",
    "buenos aires": "$975",
    "cape town": "$1350",
    "vancouver": "$295",
    "mexico city": "$415",
    "madrid": "$860",
    "istanbul": "$995",
    "athens": "$920"
}

def get_ticket_price(destination_city):
    print(f"Tool get_ticket_price called for {destination_city}")
    city = destination_city.lower()
    return ticket_prices.get(city, "Unknown")

In [7]:
get_ticket_price("London")

Tool get_ticket_price called for London


'$799'

In [8]:
def make_booking(name, destination_city, date=None):
    city = destination_city.lower()
    price = ticket_prices.get(city, "Unknown")

    if price == "Unknown":
        confirmation = f"Sorry, we don't currently fly to {destination_city}."
    else:
        confirmation = f"Booking confirmed for {name} to {destination_city.title()}"
        if date:
            confirmation += f" on {date}"
        confirmation += f". Total cost: {price}"
    
    return confirmation

In [9]:
booking_function = {
    "name": "make_booking",
    "description": "Book a flight to a city. Use this when the user says they want to book a flight.",
    "parameters": {
        "type": "object",
        "properties": {
            "name": {
                "type": "string",
                "description": "The name of the person booking the flight"
            },
            "destination_city": {
                "type": "string",
                "description": "The city to which the user wants to fly"
            },
            "date": {
                "type": "string",
                "description": "The desired flight date in YYYY-MM-DD format",
            }
        },
        "required": ["name", "destination_city"]
    }
}

price_function = {
    "name": "get_ticket_price",
    "description": "Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price, for example when a customer asks 'How much is a ticket to this city'",
    "parameters": {
        "type": "object",
        "properties": {
            "destination_city": {
                "type": "string",
                "description": "The city that the customer wants to travel to",
            },
        },
        "required": ["destination_city"],
        "additionalProperties": False
    }
}

In [10]:
# And this is included in a list of tools:

tools = [
    {"type": "function", "function": price_function}, 
    {"type": "function", "function": booking_function}
]

In [11]:
# Some imports for handling images

import base64
from io import BytesIO
from PIL import Image

In [12]:
def artist(city):
    image_response = openai.images.generate(
            model="dall-e-3",
            prompt=f"An image representing a vacation in {city}, showing tourist spots and everything unique about {city}, in a vibrant pop-art style",
            size="1024x1024",
            n=1,
            response_format="b64_json",
        )
    image_base64 = image_response.data[0].b64_json
    image_data = base64.b64decode(image_base64)
    return Image.open(BytesIO(image_data))

In [13]:
def translate_with_claude(text, target_language="Spanish"):
    message = f"Please translate the following response into {target_language}:\n\n{text}"

    response = claude.messages.create(
        model="claude-3-sonnet-20240229",
        max_tokens=1000,
        temperature=0.3,
        messages=[{"role": "user", "content": message}]
    )

    return response.content[0].text.strip()

In [14]:
def handle_audio_input(audio_path, history):
    # Handle empty or invalid audio path before Whisper or Gradio crashes
    if not audio_path or not isinstance(audio_path, str) or not os.path.isfile(audio_path):
        print("Invalid or missing audio path:", audio_path)
        return history, "⚠️ No audio received or file not found.", gr.update(value=None)

    try:
        text = transcribe_audio(audio_path)
    except Exception as e:
        print("Audio transcription failed:", str(e))
        return history, "⚠️ Could not transcribe audio.", gr.update(value=None)

    history += [{"role": "user", "content": text}]

    try:
        history, image, translation = chat(history)
    except Exception as e:
        print("Chat failed:", str(e))
        return history, "⚠️ Chat error occurred.", gr.update(value=None)

    return history, translation, gr.update(value=None)

In [15]:
from pydub import AudioSegment
from pydub.playback import play

def talker(message):
    response = openai.audio.speech.create(
      model="tts-1",
      voice="alloy",    # Also, try replacing onyx with alloy
      input=message
    )
    
    audio_stream = BytesIO(response.content)
    audio = AudioSegment.from_file(audio_stream, format="mp3")
    play(audio)

In [16]:
def chat(history):
    messages = [{"role": "system", "content": system_message}] + history
    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)
    image = None
    
    if response.choices[0].finish_reason=="tool_calls":
        message = response.choices[0].message
        tool_responses, city = handle_tool_call(message)
        
        messages.append(message)
        messages.extend(tool_responses)

        image = artist(city) if city else None

        # Request follow-up assistant message after tool call(s)
        response = openai.chat.completions.create(model=MODEL, messages=messages)
        
    reply = response.choices[0].message.content
    history += [{"role":"assistant", "content":reply}]
    translated_reply = translate_with_claude(reply, target_language="Spanish")

    talker(reply)
    talker(translated_reply)
    
    return history, image, translated_reply

In [17]:
def transcribe_audio(filepath):
    if not filepath or not os.path.exists(filepath):
        raise FileNotFoundError("Audio file not found or invalid path.")

    with open(filepath, "rb") as f:
        transcript = openai.audio.transcriptions.create(
            model="whisper-1",
            file=f,
            response_format="text"
        )
    return transcript

In [18]:
# More involved Gradio code as we're not using the preset Chat interface!
# Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately.

with gr.Blocks() as ui:
    with gr.Row():
        chatbot = gr.Chatbot(height=500, label="English (AI Assistant)", type="messages")
        translation_box = gr.Textbox(label="Translated Response", lines=10)

    with gr.Row():
        image_output = gr.Image(height=300)

    with gr.Row():
        entry = gr.Textbox(label="Chat with our AI Assistant:")
        mic = gr.Microphone(label="🎤 Speak", type="filepath")
    
    with gr.Row():
        clear = gr.Button("Clear")

    def do_entry(message, history):
        history += [{"role":"user", "content":message}]
        return "", history

    # 🧠 Update the `chat` function to return translation too
    entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot]).then(
        chat, inputs=chatbot, outputs=[chatbot, image_output, translation_box]
    )

    mic.change(
        handle_audio_input,
        inputs=[mic, chatbot],
        outputs=[chatbot, translation_box, mic]
    )

    clear.click(lambda: None, inputs=None, outputs=[chatbot, translation_box], queue=False)

ui.launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.




Please migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.
  response = claude.messages.create(
Input #0, wav, from '/var/folders/kd/w1l5sb7s7_s1n7hx4jbk2bjc0000gn/T/tmpx__q3ske.wav':
  Duration: 00:00:04.46, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   4.42 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 




Input #0, wav, from '/var/folders/kd/w1l5sb7s7_s1n7hx4jbk2bjc0000gn/T/tmpcc614o63.wav':
  Duration: 00:00:06.60, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   6.52 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 


Invalid or missing audio path: None
Tool get_ticket_price called for London


Please migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.
  response = claude.messages.create(
Input #0, wav, from '/var/folders/kd/w1l5sb7s7_s1n7hx4jbk2bjc0000gn/T/tmp7e4h3c0d.wav':
  Duration: 00:00:04.49, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   4.41 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 




Input #0, wav, from '/var/folders/kd/w1l5sb7s7_s1n7hx4jbk2bjc0000gn/T/tmph1o7hhiw.wav':
  Duration: 00:00:06.94, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   6.87 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 


Invalid or missing audio path: None


Please migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.
  response = claude.messages.create(
Input #0, wav, from '/var/folders/kd/w1l5sb7s7_s1n7hx4jbk2bjc0000gn/T/tmpldys8cz8.wav':
  Duration: 00:00:06.05, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   5.92 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 




Input #0, wav, from '/var/folders/kd/w1l5sb7s7_s1n7hx4jbk2bjc0000gn/T/tmpof2rsh21.wav':
  Duration: 00:00:08.62, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   8.48 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 


Invalid or missing audio path: None


Please migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.
  response = claude.messages.create(
Input #0, wav, from '/var/folders/kd/w1l5sb7s7_s1n7hx4jbk2bjc0000gn/T/tmpwg9b_gc9.wav':
  Duration: 00:00:02.33, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   2.25 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 




Input #0, wav, from '/var/folders/kd/w1l5sb7s7_s1n7hx4jbk2bjc0000gn/T/tmp84b7kt40.wav':
  Duration: 00:00:02.40, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   2.34 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 


Invalid or missing audio path: None


Traceback (most recent call last):
  File "/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/routes.py", line 1093, in predict
    output = await route_utils.call_process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 2147, in process_api
    data = await self.postprocess_data(block_fn, result["prediction"], state)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 1889, in postprocess_data
    self.validate_outputs(block_fn, predictions)  # type: ignore
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/envs/llms/lib/python3.11/site-packages




Input #0, wav, from '/var/folders/kd/w1l5sb7s7_s1n7hx4jbk2bjc0000gn/T/tmpcbmuvupm.wav':
  Duration: 00:00:06.55, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   6.42 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 


Invalid or missing audio path: None


Please migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.
  response = claude.messages.create(
Input #0, wav, from '/var/folders/kd/w1l5sb7s7_s1n7hx4jbk2bjc0000gn/T/tmpv1_vsent.wav':
  Duration: 00:00:06.02, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   5.92 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 




Input #0, wav, from '/var/folders/kd/w1l5sb7s7_s1n7hx4jbk2bjc0000gn/T/tmpr64otm3k.wav':
  Duration: 00:00:08.40, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   8.32 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 


Invalid or missing audio path: None
