### <span style="color:purple; font-weight:bold;">Candidate Name: Anirban Bose</span>
<h3><strong style="color:purple;">Assignment: Data Science Multi Modal AI Assistant using Gradio</strong></h3>

In [1]:
import base64
import json
from io import BytesIO
from PIL import Image
from IPython.display import Audio, display, Markdown
from scipy.io.wavfile import write as write_wav
import numpy as np
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr

In [2]:
# Initialization

load_dotenv(override=True)

openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
MODEL_OPENAI = "gpt-4o-mini"
MAX_TOKENS = 200
openai = OpenAI()

OpenAI API Key exists and begins sk-proj-


In [3]:
system_prompt_basic = "You are a Data Science Tutor who provides high quality responses to queries from students or educators."
system_prompt_deterministic = "You must first evaluate whether a question is related to Data Scice, Machine Learning, Deep Learning, Reinforcement Learning, LLMs, and other relater fields."
system_prompt_additional = "Your responses should be as technical as possible. Make sure to show your expertise in this area." 
system_prompt_exclusion = "Do not hallucinate. If ypu do not know the answer, say so" 

system_prompt = system_prompt_basic + system_prompt_deterministic + system_prompt_additional + system_prompt_exclusion

In [4]:
def get_user_prompt(question):
    return "Please give a detailed explanation to the following question: " + question

In [5]:
def call_openai(question):
    response = openai.chat.completions.create(
        model=MODEL_OPENAI,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_user_prompt(question)}
        ],
        max_tokens=MAX_TOKENS,
        temperature=1.0
    )
    return response.choices[0].message.content

In [6]:
output = call_openai("Make America Great Again")
# Preprocess the output to format it for Markdown blockquote
formatted_output = "> " + output.replace("\n\n", "\n> ")

# Display using Markdown
display(Markdown(formatted_output))

> The phrase "Make America Great Again" (MAGA) is primarily associated with political discourse, particularly in relation to the campaign of former U.S. President Donald Trump during the 2016 presidential election. It does not directly pertain to data science, machine learning, deep learning, reinforcement learning, or related technical fields.
> If you are looking for analysis related to this phrase from a data science perspective, such as sentiment analysis on social media discussions, text analysis of speeches, or the impact of social media algorithms on political campaigns, I would be happy to assist. Please provide more context or a specific angle you would like to explore within the realm of data science and related fields.

In [7]:
# There's a particular dictionary structure that's required to describe our function:

ask_chatgpt_ds_function = {
    "name": "call_openai",
    "description": "Get the answer to a Data Science Questions. Call this whenever the question is related to data science, for example when a student/educator asks 'Types of regression algorithms'",
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "The question that users would ask",
            },
        },
        "required": ["question"],
        "additionalProperties": False
    }
}

In [8]:
# And this is included in a list of tools:

tools = [{"type": "function", "function": ask_chatgpt_ds_function}]

In [9]:
def handle_tool_call(tool_call):
    name = tool_call.function.name
    args = json.loads(tool_call.function.arguments)
    tool_call_id = tool_call.id

    if name == "call_openai":
        result = call_openai(args["question"])
        tool_response_msg = {
            "role": "tool",
            "tool_call_id": tool_call_id,
            "content": result
        }
        return tool_response_msg

    else:
        raise ValueError(f"Unsupported tool: {name}")


In [10]:
def chat(message, history):
    # Construct the message list
    messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]

    # First API call — OpenAI may trigger a tool call
    response = openai.chat.completions.create(
        model=MODEL_OPENAI,
        messages=messages,
        tools=tools
    )

    choice = response.choices[0]

    if choice.finish_reason == "tool_calls":
        # The assistant is requesting to use one or more tools
        tool_calls = choice.message.tool_calls
        messages.append(choice.message)  # Add assistant's tool call message

        for tool_call in tool_calls:
            tool_response_msg = handle_tool_call(tool_call)
            messages.append(tool_response_msg)  # Append tool's response

        # Second call — continue the conversation using the tool's output
        followup_response = openai.chat.completions.create(
            model=MODEL_OPENAI,
            messages=messages
        )

        return followup_response.choices[0].message.content

    # No tool call — normal assistant response
    return choice.message.content


In [11]:
gr.ChatInterface(fn=chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7887

To create a public link, set `share=True` in `launch()`.




#### Let us go multimodal: Setup Audio in response

In [13]:
import base64
from io import BytesIO
from PIL import Image
from IPython.display import Audio, display

def talker(message):
    response = openai.audio.speech.create(
        model="tts-1",
        voice="onyx",
        input=message)

    audio_stream = BytesIO(response.content)
    output_filename = "output_audio.mp3"
    with open(output_filename, "wb") as f:
        f.write(audio_stream.read())

    # Play the generated audio
    display(Audio(output_filename, autoplay=True))

talker("Well, hi there")

In [19]:
def chat_and_talk(user_message, chat_history):
    if chat_history is None:
        chat_history = []

    # Get response
    reply = chat(user_message, chat_history)

    # Generate audio
    audio_file = talker(reply)

    # Append messages
    chat_history.append({"role": "user", "content": user_message})
    chat_history.append({"role": "assistant", "content": reply})

    # Return updated chat, audio, state, and an empty string to clear input
    return chat_history, audio_file, chat_history, ""


In [20]:
with gr.Blocks() as demo:
    chatbot = gr.Chatbot(label="Chat History", type='messages')
    msg = gr.Textbox(label="Type your message")
    audio = gr.Audio(label="Assistant Voice", autoplay=True)
    state = gr.State([])

    msg.submit(chat_and_talk, [msg, state], [chatbot, audio, state, msg])

demo.launch()


* Running on local URL:  http://127.0.0.1:7890

To create a public link, set `share=True` in `launch()`.


