End of week 2 exercise

In [38]:
# imports

import os
import json
import gradio as gr
import base64
from dotenv import load_dotenv
from openai import OpenAI
from io import BytesIO
from PIL import Image
from pydub import AudioSegment
from pydub.playback import play


In [39]:
# Initialization

load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print("API key found and looks good so far!")
else:
    print("No API key was found!")

MODEL = "gpt-4o-mini"
openai = OpenAI()

API key found and looks good so far!


In [40]:
system_message = (
    "You are a helpful tennis coach who answers questions about tennis rules, "
    "players, strategies, training, and equipment."
    "Give short, courteous answers, no more than 2 sentence."
)

In [41]:
# Translation

LANG_CODES = {
    "English": "en",
    "Spanish": "es",
    "French": "fr"
}

def translate_text(text, target_language="en"):
    messages = [
        {"role": "system", "content": f"You are a translator. Translate the following text to {target_language}"},
        {"role": "user", "content": text}
    ]
    response = openai.chat.completions.create(model=MODEL, messages=messages)
    return response.choices[0].message.content

def tennis_info_tool(query):
    if "top" in query.lower():
        return "Top male players: Djokovic, Nadal, Federer. Top female players: Barty, Sabalenka, Swiatek."


In [42]:
# Image
def generate_tennis_image(prompt):
    image_response = openai.images.generate(
        model="dall-e-3",
        prompt=f"Tennis scene: {prompt}, realistic and detailed, vibrant colors",
        size="1024x1024",
        n=1,
        response_format="b64_json",
    )
    image_base64 = image_response.data[0].b64_json
    image_data = base64.b64decode(image_base64)
    return Image.open(BytesIO(image_data))

In [43]:
# Audio

def talker(message):
    response = openai.audio.speech.create(
        model="tts-1",
        voice="onyx",
        input=message
    )

    audio_stream = BytesIO(response.content)
    audio = AudioSegment.from_file(audio_stream, format="mp3")
    play(audio)

In [44]:
def chat(history, user_message, target_language="English", use_audio=False, generate_image=False):
    image = None

    if any(keyword in user_message.lower() for keyword in ["top", "players"]):
        reply = tennis_info_tool(user_message)
    else:
        messages = [{"role": "system", "content": system_message}] + history
        response = openai.chat.completions.create(model=MODEL, messages=messages)
        reply = response.choices[0].message.content
    
    if target_language != "English":
        code = LANG_CODES.get(target_language, "en")
        reply = translate_text(reply, code)

    history.append({"role": "assistant", "content": reply})
    
    if use_audio:
        talker(reply)

    if generate_image:
        image = generate_tennis_image(reply)
    return history, image

In [45]:
# Gradio

with gr.Blocks() as ui:
    with gr.Row():
        chatbot = gr.Chatbot(height=500, type="messages")
        image_output = gr.Image(height=500)
    with gr.Row():
        entry = gr.Textbox(label="Ask your Tennis Coach:", placeholder="Type and press Enter...")
        target_lang_dropdown = gr.Dropdown(
            choices=["English", "Spanish", "French"],
            value="English",
            label="Translate to:"
        )
        audio_toggle = gr.Checkbox(label="Play audio", value=False)
        image_toggle = gr.Checkbox(label="Generate image for this reply", value=True)
    with gr.Row():
        clear = gr.Button("Clear")
        
    def add_user_message(message, history):
        history.append({"role": "user", "content": message})
        return "", history

    def chat_response(history, message, target_language, use_audio, generate_image):
        history, image = chat(history, message, target_language, use_audio, generate_image)
        return history, image

    entry.submit(
        add_user_message,
        inputs=[entry, chatbot],
        outputs=[entry, chatbot]).then(
        chat_response,
        inputs=[chatbot, entry, target_lang_dropdown, audio_toggle, image_toggle],
        outputs=[chatbot, image_output]
    )

    clear.click(lambda: (None, None, None), inputs=None, outputs=[chatbot, image_output, entry], queue=False)

ui.launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




Input #0, wav, from '/var/folders/73/0s09hh2n48q7s14tld64q3rh0000gn/T/tmp4hoe_x5n.wav':
  Duration: 00:00:06.55, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   6.45 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 




Input #0, wav, from '/var/folders/73/0s09hh2n48q7s14tld64q3rh0000gn/T/tmp2mxw0wth.wav':
  Duration: 00:00:04.61, bitrate: 384 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 24000 Hz, 1 channels, s16, 384 kb/s
   4.48 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B 


