# Project - Airline AI Assistant

We'll now bring together what we've learned to make an AI Customer Support assistant for an Airline

In [19]:
# imports

import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
from faster_whisper import WhisperModel

In [20]:
# Initialization

load_dotenv(override=True)

openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
MODEL = "gpt-4o-mini"
w_model = WhisperModel("tiny",compute_type="int8")
openai = OpenAI()

OpenAI API Key exists and begins sk-proj-


vocabulary.txt: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.bin:   0%|          | 0.00/75.5M [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [6]:
system_message = "You are a helpful assistant for an Airline called FlightAI. "
system_message += "Give short, courteous answers, no more than 1 sentence. "
system_message += "Always be accurate. If you don't know the answer, say so."

## Tools

Tools are an incredibly powerful feature provided by the frontier LLMs.

With tools, you can write a function, and have the LLM call that function as part of its response.

Sounds almost spooky.. we're giving it the power to run code on our machine?

Well, kinda.

In [73]:
# Let's start by making a useful function

ticket_prices = {"london": "$799", "paris": "$899", "tokyo": "$1400", "berlin": "$499"}

def get_ticket_price(destination_city):
    print(f"Tool get_ticket_price called for {destination_city}")
    city = destination_city.lower()
    return ticket_prices.get(city, "Unknown")

In [74]:
# There's a particular dictionary structure that's required to describe our function:

price_function = {
    "name": "get_ticket_price",
    "description": "Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price, for example when a customer asks 'How much is a ticket to this city'",
    "parameters": {
        "type": "object",
        "properties": {
            "destination_city": {
                "type": "string",
                "description": "The city that the customer wants to travel to",
            },
        },
        "required": ["destination_city"],
        "additionalProperties": False
    }
}

In [75]:
# And this is included in a list of tools:

tools = [{"type": "function", "function": price_function}]

## Getting OpenAI to use our Tool

There's some fiddly stuff to allow OpenAI "to call our tool"

What we actually do is give the LLM the opportunity to inform us that it wants us to run the tool.

Here's how the new chat function looks:

In [76]:
# We have to write that function handle_tool_call:

def handle_tool_call(message):
    tool_call = message.tool_calls[0]
    arguments = json.loads(tool_call.function.arguments)
    city = arguments.get('destination_city')
    price = get_ticket_price(city)
    response = {
        "role": "tool",
        "content": json.dumps({"destination_city": city,"price": price}),
        "tool_call_id": tool_call.id
    }
    return response, city

# Let's go multi-modal!!

We can use DALL-E-3, the image generation model behind GPT-4o, to make us some images

Let's put this in a function called artist.

### Price alert: each time I generate an image it costs about 4 cents - don't go crazy with images!

In [77]:
# Some imports for handling images

import base64
from io import BytesIO
from PIL import Image

In [78]:
def artist(city):
    image_response = openai.images.generate(
            model="dall-e-3",
            prompt=f"An image representing a vacation in {city}, showing tourist spots and everything unique about {city}, in a vibrant pop-art style",
            size="1024x1024",
            n=1,
            response_format="b64_json",
        )
    image_base64 = image_response.data[0].b64_json
    image_data = base64.b64decode(image_base64)
    return Image.open(BytesIO(image_data))

# For Mac users - and possibly many PC users too

This version should work fine for you. It might work for Windows users too, but you might get a Permissions error writing to a temp file. If so, see the next section!

As always, if you have problems, please contact me! (You could also comment out the audio talker() in the later code if you're less interested in audio generation)

In [79]:
from pydub import AudioSegment
from pydub.playback import play

def talker(message):
    response = openai.audio.speech.create(
      model="tts-1",
      voice="onyx",    # Also, try replacing onyx with alloy
      input=message
    )
    
    audio_stream = BytesIO(response.content)
    audio = AudioSegment.from_file(audio_stream, format="mp3")
    play(audio)

# Our Agent Framework

The term 'Agentic AI' and Agentization is an umbrella term that refers to a number of techniques, such as:

1. Breaking a complex problem into smaller steps, with multiple LLMs carrying out specialized tasks
2. The ability for LLMs to use Tools to give them additional capabilities
3. The 'Agent Environment' which allows Agents to collaborate
4. An LLM can act as the Planner, dividing bigger tasks into smaller ones for the specialists
5. The concept of an Agent having autonomy / agency, beyond just responding to a prompt - such as Memory

We're showing 1 and 2 here, and to a lesser extent 3 and 5. In week 8 we will do the lot!

In [80]:
def chat(history):
    messages = [{"role": "system", "content": system_message}] + history
    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)
    image = None
    
    if response.choices[0].finish_reason=="tool_calls":
        message = response.choices[0].message
        response, city = handle_tool_call(message)
        messages.append(message)
        messages.append(response)
        image = artist(city)
        response = openai.chat.completions.create(model=MODEL, messages=messages)
        
    reply = response.choices[0].message.content
    history += [{"role":"assistant", "content":reply}]

    # Comment out or delete the next line if you'd rather skip Audio for now..
    talker(reply)
    
    return history, image

In [None]:
# More involved Gradio code as we're not using the preset Chat interface!
# Passing in inbrowser=True in the last line will cause a Gradio window to pop up immediately.
# Load transcription model
import mimetypes
whisp_model = WhisperModel("tiny", compute_type="int8")

# Guess MIME types
def guess_mime(path):
    mime, _ = mimetypes.guess_type(path)
    return mime or "application/octet-stream"

def transcribe(audio_path):
    segments, _ = whisp_model.transcribe(audio_path, vad_filter=False)
    segments = list(segments)
    full_text = " ".join([seg.text for seg in segments])
    return full_text.strip()

with gr.Blocks() as ui:
    with gr.Row():
        chatbot = gr.Chatbot(height=500, type="messages")
        image_output = gr.Image(height=500)
        state = gr.State([])
    with gr.Row():
        entry = gr.MultimodalTextbox(
            interactive=True,
            file_count="single",
            placeholder="Chat by typing or talking with our AI Assistant:",
            show_label=True,
            sources=["microphone", "upload"],
            )
    with gr.Row():
        clear = gr.Button("Clear")

    def do_entry(inputs, history):
        text_input = ""
        audio_path = None

        text_input = inputs.get("text", "")
        file_paths = inputs.get("files", [])
    
        # Separate by type
        audio_path = [f for f in file_paths if guess_mime(f).startswith("audio/")]
        image_files = [f for f in file_paths if guess_mime(f).startswith("image/")]

    # Transcribe audio
        audio_text = transcribe(audio_path[0])
        merged_input = f"{text_input} {audio_text}".strip()
        message = {"role": "user", "content": merged_input}
        if not merged_input:
            return history
        history.append(message)
        return "", history

    entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot]).then(
        chat, inputs=chatbot, outputs=[chatbot, image_output]
    )
    clear.click(lambda: None, inputs=None, outputs=chatbot, queue=False)

ui.launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7899
* To create a public link, set `share=True` in `launch()`.




Inputs received: {'text': '', 'files': ['/private/var/folders/wd/vhv7g6pj5dl50yr9xq6wtbgh0000gn/T/gradio/d2b9ca931e97223a6f210fdede4ceb605b7f1683600d284a0004ae965c01f840/audio.wav']}
Audio input received: ['/private/var/folders/wd/vhv7g6pj5dl50yr9xq6wtbgh0000gn/T/gradio/d2b9ca931e97223a6f210fdede4ceb605b7f1683600d284a0004ae965c01f840/audio.wav']
Tool get_ticket_price called for London


Input #0, wav, from '/var/folders/wd/vhv7g6pj5dl50yr9xq6wtbgh0000gn/T/tmpnfy1r80e.wav':
  Duration: 00:00:02.28, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   2.18 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 




# Exercises and Business Applications

Add in more tools - perhaps to simulate actually booking a flight. A student has done this and provided their example in the community contributions folder.

Next: take this and apply it to your business. Make a multi-modal AI assistant with tools that could carry out an activity for your work. A customer support assistant? New employee onboarding assistant? So many possibilities! Also, see the week2 end of week Exercise in the separate Notebook.

<table style="margin: 0; text-align: left;">
    <tr>
        <td style="width: 150px; height: 150px; vertical-align: middle;">
            <img src="../thankyou.jpg" width="150" height="150" style="display: block;" />
        </td>
        <td>
            <h2 style="color:#090;">I have a special request for you</h2>
            <span style="color:#090;">
                My editor tells me that it makes a HUGE difference when students rate this course on Udemy - it's one of the main ways that Udemy decides whether to show it to others. If you're able to take a minute to rate this, I'd be so very grateful! And regardless - always please reach out to me at ed@edwarddonner.com if I can help at any point.
            </span>
        </td>
    </tr>
</table>

# Additional End of week Exercise - week 2

Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.

This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!

If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.

I will publish a full solution here soon - unless someone beats me to it...

There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results.

In [None]:
import sounddevice as sd
import numpy as np
import tempfile
import scipy.io.wavfile

def record_audio(duration=5, fs=16000):
    print("Recording... Speak now!")
    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
    sd.wait()
    return audio, fs

def speech_to_text():
    audio, fs = record_audio()
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
        scipy.io.wavfile.write(tmpfile.name, fs, audio)
        with open(tmpfile.name, "rb") as audio_file:
            transcript = openai.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file
            )
    return transcript.text

# Example usage:
spoken_text = speech_to_text()

#history, image = chat([{"role": "user", "content": spoken_text}])

Recording... Speak now!
You said: Jesus, what's your name?


In [36]:
import os
import gradio as gr
from dotenv import load_dotenv
from faster_whisper import WhisperModel
import openai

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

model = WhisperModel("tiny", compute_type="int8")

def transcribe_audio(audio_path):
    if not audio_path:
        return ""
    segments, _ = model.transcribe(audio_path)
    return " ".join([s.text for s in segments]).strip()

def handle_input(text, audio, history=[]):
    audio_text = transcribe_audio(audio)
    full_input = f"{text or ''} {audio_text or ''}".strip()

    if not full_input:
        return history

    history.append((f"🗣️ {full_input}", ""))

    messages = [{"role": "system", "content": "You are a helpful assistant."}]
    for user_msg, bot_msg in history:
        messages.append({"role": "user", "content": user_msg.replace("🗣️ ", "")})
        if bot_msg:
            messages.append({"role": "assistant", "content": bot_msg})

    response = openai.chat.completions.create(
        model=MODEL, messages=messages
    )
    reply = response.choices[0].message.content.strip()
    history[-1] = (history[-1][0], f"🤖 {reply}")
    return history

with gr.Blocks() as demo:
    gr.Markdown("🎤💬 Chatbot with Text + Microphone Audio Input")
    state = gr.State([])

    with gr.Row():
        text_input = gr.Textbox(label="Type your message")
        audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Or speak")

    submit = gr.Button("Submit")
    clear = gr.Button("Clear")
    chatbox = gr.Chatbot()

    submit.click(
        handle_input, 
        inputs=[text_input, audio_input, state], 
        outputs=[chatbox]
    )

    clear.click(lambda: ([], [], []), None, outputs=[text_input, audio_input, chatbox, state])

demo.launch()


  chatbox = gr.Chatbot()


* Running on local URL:  http://127.0.0.1:7867
* To create a public link, set `share=True` in `launch()`.




In [38]:
import gradio as gr

# Function to process the input text (you can modify this to do anything you want)
def process_input(text):
    return f"You said or typed: '{text}'"

# Create a Gradio interface with both text input and microphone capabilities
with gr.Blocks() as demo:
    gr.Markdown("## Multi-Model Text Box Example")
    
    # Textbox for direct text input
    text_box = gr.Textbox(label="Type or Speak", placeholder="Type something here...")

    # Add a microphone button for speech input
    microphone = gr.Audio(sources="microphone", type="numpy", label="Or use your microphone")

    # Button to process the input
    submit_button = gr.Button("Submit")

    # Display the output
    output = gr.Output()

    def process_audio(audio):
        # You will need to process the audio here if you want
        # For demonstration, we will just output that audio was received.
        return "Audio received, processing is not shown in this example."

    # Defining the actions for button clicks
    submit_button.click(fn=process_input, inputs=text_box, outputs=output)
    microphone.change(fn=process_audio, inputs=microphone, outputs=output)

# Launch the Gradio interface
demo.launch()

AttributeError: module 'gradio' has no attribute 'Output'