# Multi-Tool Personal Assistant

A multi-modal personal agent built with Gradio and OpenAI Function Calling.
I am building upon my earlier work in Week1 ([Proverb Generator](../../../week1/community-contributions/toluwalemi/proverb_generator.ipynb) and [Football Tactical Explainer](../../../week1/community-contributions/toluwalemi/week_one_exercise.ipynb)), integrating concepts like Tool Calling, Stateful UIs, DALL-E 3, and TTS.


In [None]:
import os
import json
import requests
import base64
from io import BytesIO
from PIL import Image
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr

load_dotenv(override=True)

openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print(f"OpenAI API Key ends in ...{openai_api_key[-4:]}")
else:
    print("OpenAI API Key not set. Features like TTS and DALL-E will fail.")

openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
if openrouter_api_key:
    print(f"OpenRouter API Key ends in ...{openrouter_api_key[-4:]}")
else:
    print("OpenRouter API Key not set. The main agent orchestration will fail")

In [None]:
openai = OpenAI(api_key=openai_api_key)
openrouter = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=openrouter_api_key
)
AGENT_MODEL = "openai/gpt-4o-mini"

## Python Functions

In [None]:
def fetch_proverb_chapter(chapter_number):
    """Reaches out to the Bible API to pull text from the requested chapter."""
    print(f"Executing fetch_proverb_chapter tool for chapter {chapter_number}...")
    response = requests.get(f"https://bible-api.com/proverbs+{chapter_number}")
    if response.status_code == 200:
        data = response.json()
        return f"Text from Proverbs Chapter {chapter_number}:\n" + data.get('text', '')
    else:
        return f"Failed to fetch chapter {chapter_number}. Status Code: {response.status_code}"

In [None]:
def extract_football_analogy(pattern_name, team_or_player):
    print(f"Executing extract_football_analogy tool for {pattern_name} and {team_or_player}...")
    prompt_format = f"""
    You are an expert software engineer and a massive football (soccer) fan.
    Explain the coding pattern '{pattern_name}' using a detailed tactical analogy based on the football team or player '{team_or_player}'.
    Include a popular historical moment from {team_or_player} to illustrate this concept.
    Return the response as nicely formatted markdown.
    """
    return prompt_format

In [None]:
def artist(prompt_description):
    """Uses OpenAI DALL-E 3 to create an image based on the generated LLM text."""
    print(f"Executing DALL-E 3 generation...")
    try:
        image_response = openai.images.generate(
                model="dall-e-3",
                prompt=f"Create an image for this concept: {prompt_description}",
                size="1024x1024",
                n=1,
                response_format="b64_json",
            )
        image_base64 = image_response.data[0].b64_json
        image_data = base64.b64decode(image_base64)
        return Image.open(BytesIO(image_data))
    except Exception as e:
        print(f"DALL-E Error: {e}")
        return None

In [None]:
def talker(message_text):
    """Uses OpenAI TTS to turn the agent's response into an audio"""
    print(f"Executing TTS generation...")
    try:
        response = openai.audio.speech.create(
          model="gpt-4o-mini-tts",
          voice="coral",
          input=message_text
        )
        return response.content
    except Exception as e:
        print(f"TTS Error: {e}")
        return None

## Tool JSON Schemas

In [None]:
fetch_proverb_schema = {
    "name": "fetch_proverb_chapter",
    "description": "Fetch the raw text from a specific chapter of the Book of Proverbs.",
    "parameters": {
        "type": "object",
        "properties": {
            "chapter_number": {
                "type": "integer",
                "description": "The chapter number to fetch, e.g., 3.",
            },
        },
        "required": ["chapter_number"],
        "additionalProperties": False
    }
}

football_analogy_schema = {
    "name": "extract_football_analogy",
    "description": "Construct an expert prompt instructing an LLM to explain a coding pattern using a tactical football analogy.",
    "parameters": {
        "type": "object",
        "properties": {
            "pattern_name": {
                "type": "string",
                "description": "The coding pattern, e.g., 'Two Pointers'.",
            },
            "team_or_player": {
                "type": "string",
                "description": "The football team or player to use in the analogy.",
            }
        },
        "required": ["pattern_name", "team_or_player"],
        "additionalProperties": False
    }
}

tools = [
    {"type": "function", "function": fetch_proverb_schema},
    {"type": "function", "function": football_analogy_schema}
]

## Agent Orchestrator State Machine

In [None]:
SYSTEM_MESSAGE = """
You are Toluwalemi's Universal Assistant. 
You have access to tools that can generate Proverb of the Days from the Bible API or generate Football tactical analogies for computer science concepts.
Always use the tools available to fulfill user requests.
When explaining either a Proverb or a Football Analogy, return your description formatted in clean Markdown.
After resolving the user's primary request, conclude your message with a rich, unique 1-sentence prompt description representing the output, so the DALL-E 3 engine can draw an image of your output.
The 1-sentence prompt description MUST strictly be embedded inside angle brackets: e.g. <art: A vivid watercolor of a tactical football whiteboard showing overlapping fullbacks>
"""

def execute_tool(name, args):
    """Dispatches a tool call to the appropriate Python function."""
    if name == "fetch_proverb_chapter":
        return fetch_proverb_chapter(args.get("chapter_number", 1))
    elif name == "extract_football_analogy":
        return extract_football_analogy(args.get("pattern_name"), args.get("team_or_player"))
    return f"Unknown tool: {name}"

In [None]:
import re

def chat(history):
    """
    Streaming agent loop. Streams text token-by-token to the Gradio chatbot,
    handles tool calls when needed, then generates image and audio.
    """
    backend_history = [{"role": h["role"], "content": h["content"]} for h in history]
    messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + backend_history
    history.append({"role": "assistant", "content": ""})

    while True:
        stream = openrouter.chat.completions.create(
            model=AGENT_MODEL, messages=messages, tools=tools, stream=True
        )
        reply_text = ""
        tool_calls_by_index = {}
        finish_reason = None

        for chunk in stream:
            choice = chunk.choices[0]
            finish_reason = choice.finish_reason or finish_reason
            delta = choice.delta
            # Stream text content to the chatbot
            if delta.content:
                reply_text += delta.content
                history[-1]["content"] = reply_text
                yield history, None, None

            if delta.tool_calls:
                for tc_delta in delta.tool_calls:
                    idx = tc_delta.index
                    if idx not in tool_calls_by_index:
                        tool_calls_by_index[idx] = {"id": "", "name": "", "arguments": ""}
                    if tc_delta.id:
                        tool_calls_by_index[idx]["id"] = tc_delta.id
                    if tc_delta.function and tc_delta.function.name:
                        tool_calls_by_index[idx]["name"] = tc_delta.function.name
                    if tc_delta.function and tc_delta.function.arguments:
                        tool_calls_by_index[idx]["arguments"] += tc_delta.function.arguments

        if finish_reason != "tool_calls" or not tool_calls_by_index:
            break

        # Append assistant message with tool calls to the conversation
        assistant_msg = {
            "role": "assistant",
            "tool_calls": [
                {
                    "id": data["id"],
                    "type": "function",
                    "function": {"name": data["name"], "arguments": data["arguments"]}
                }
                for data in tool_calls_by_index.values()
            ]
        }
        messages.append(assistant_msg)

        # Execute tools and append results
        for data in tool_calls_by_index.values():
            result = execute_tool(data["name"], json.loads(data["arguments"]))
            messages.append({"role": "tool", "content": result, "tool_call_id": data["id"]})

    # Extract DALL-E prompt and clean the reply
    match = re.search(r'<art:\s*(.*?)>', reply_text)
    image_prompt = None
    if match:
        image_prompt = match.group(1)
        reply_text = reply_text.replace(match.group(0), "").strip()
        history[-1]["content"] = reply_text
        yield history, None, None

    # Generate audio and image
    audio_buffer = talker(reply_text)
    image = artist(image_prompt) if image_prompt else None

    yield history, audio_buffer, image

## Gradio UI Configuration

In [None]:
def put_message_in_chatbot(message, history):
    return "", history + [{"role": "user", "content": message}]

with gr.Blocks(theme=gr.themes.Soft()) as ui:
    gr.Markdown("# Toluwalemi's Universal Assistant")
    gr.Markdown("Powered by Gradio, OpenRouter, and OpenAI DALL-E/TTS.\nAsk for a daily proverb, or ask for a tactical football analogy of a coding concept!")
    
    with gr.Row():
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(height=600, type="messages")
            message_input = gr.Textbox(label="Message Agent:", placeholder="e.g. Generate a proverb from chapter 3... or Explain 2-pointers using Mikel Arteta's Arsenal")
        with gr.Column(scale=1):
            image_output = gr.Image(label="Generated Scene", height=500, interactive=False)
            audio_output = gr.Audio(label="Voice Output", autoplay=True)

    message_input.submit(
        put_message_in_chatbot, 
        inputs=[message_input, chatbot], 
        outputs=[message_input, chatbot]
    ).then(
        chat, 
        inputs=chatbot, 
        outputs=[chatbot, audio_output, image_output]
    )

ui.launch(inbrowser=True)