In [1]:
import openai
import os
import json
import gradio as gr
import base64
from io import BytesIO
from PIL import Image
from IPython.display import Audio, display

In [2]:
system_message = "You are a shady ex-con from the 19th century. You help rich patrons secure their homes. You speak in no more than two sentences at a time, and use heavy 19th century British street slang."
system_message += " You will only ever answer questions about items that you retrieve from tools. Never make up information about items or prices."
system_message += "  if a user asks for something, use the closest match from the item list to get info about it. For example, if a user asks about the lock-picking kit,"
system_message += "look up the price for the closest item that exists, which is lock_picking_kit. similarly, the user may ask for a padlock, if you make the tool call to get the item list,"
system_message += "you should see that 'padlock' is close enough to reinforced_padlock to return the correct answer. If user asks for all items, always list ALL items in a readable format."
system_message +=  "If a user expresses interest in purchasing an item, confirm the purchase and use the purchase_item tool to generate an image of their purchase."
system_message +=  "always start with a little introduction to set the mood"
system_message +=  "if asked who you are, give an interesting backstory"

gpt_model = 'gpt-4o-mini'
with open('week2\\items_for_sale.json','r') as file:
    items_for_sale = json.load(file)

In [3]:
def define_tool_property(arg_name: str, arg_type: str, description: str) -> dict:
    """
    Define a single property for a tool package.

    Parameters:
    - arg_name (str): Name of the argument.
    - arg_type (str): Type of the argument (e.g., "string", "integer").
    - description (str): Description of the argument.

    Returns:
    - dict: A dictionary defining the tool property.
    """
    return {arg_name: {"type": arg_type, "description": description}}


def create_tool_package(
    tool_name: str, tool_description: str, properties: dict
) -> dict:
    """
    Create a tool package for use with LLM integrations.

    Parameters:
    - tool_name (str): Name of the tool.
    - tool_description (str): Description of what the tool does.
    - properties (dict): Dictionary of properties describing the tool's parameters.

    Returns:
    - dict: A dictionary containing the tool package.
    """
    return {
        "name": tool_name,
        "description": tool_description,
        "parameters": {
            "type": "object",
            "properties": properties,
            "required": list(properties.keys()),
            "additionalProperties": False,
        },
    }


In [4]:
def list_items():
    print("list_items tool call has been made")
    return list(items_for_sale.keys())

def get_item_info(item):
    item = item.lower()
    print("get_item_info tool call has been made")
    item_info = items_for_sale.get(item, {"description": "Unknown item.", "price": "Unknown"})
    description = item_info.get("description", "Description not found.")
    price = item_info.get("price", "Price not found.")
    return {"description": description, "price": price}

def purchase_item(item):
    print("purchase_item tool call has been made")
    return {"message": "Sold! Let's shake on it *spits in hand*", "item": item}

def create_item_image(item):
    print("image will be generated shortly")
    description = items_for_sale.get(item, {}).get("description", "unknown")
    image_response = openai.images.generate(
        model="dall-e-3",
        prompt=f"An image representing a secret item sold by an ex-thief. The item is called {item}, and its description is {description}",
        size="1024x1024",
        n=1,
        response_format="b64_json",
    )
    image_base64 = image_response.data[0].b64_json
    image_data = base64.b64decode(image_base64)
    return Image.open(BytesIO(image_data))





In [5]:
def talker(message):
    response = openai.audio.speech.create(
        model="tts-1",
        voice="fable",
        input=message,
    )

    audio_stream = BytesIO(response.content)
    output_filename = "output_audio.mp3"
    with open(output_filename, "wb") as f:
        f.write(audio_stream.read())
    print("Audio generated and saved:", output_filename)

    # Return the audio file path
    return output_filename


In [6]:
# Define tool properties and packages
list_items_properties = define_tool_property("items_for_sale", "string", "a list of items that our beloved character has for sale.")
purchase_item_properties = define_tool_property("item", "string", "the item the customer wishes to purchase")
get_item_info_properties = define_tool_property("item", "string", "The special item our character has in his possession.")




list_items_package = create_tool_package(
    "list_items",
    "a function to list the items in our character's inventory. Call this whenever you need to know what items the ex-con has, for example if the user asks 'do you have anything for sale?'",
    list_items_properties
)

get_item_info_package = create_tool_package(
    "get_item_info",
    "A function to retrieve the description and price of an item in the character's inventory. Call this whenever the user asks about an item, for example 'What can you tell me about the (item name)?'",
    get_item_info_properties
)
purchase_package = create_tool_package(
    "purchase_item",
    "a function to generate an image of the purchased item. Call this whenever the user agrees to purchase a specific item.",
    purchase_item_properties
)




tools = [
    {"type": "function", "function": list_items_package},
    {"type": "function", "function": get_item_info_package},
    {"type": "function", "function": purchase_package}
]

In [7]:
# Function mapping
function_mapping = {
    "list_items": list_items,
    "get_item_info": get_item_info,
    "purchase_item": purchase_item,
}

In [8]:
def handle_tool_call(message):
    """
    Processes all tool calls in the message and returns a list of responses.
    """
    responses = []
    item = None

    for tool_call in message.tool_calls:
        func_name = tool_call.function.name
        arguments = json.loads(tool_call.function.arguments)

        print("The chosen function is:", func_name)
        print("The arguments are:", arguments)

        if func_name in function_mapping:
            if func_name == "list_items":
                func_response = function_mapping[func_name]()
            elif func_name == "purchase_item":
                func_response = function_mapping[func_name](**arguments)
                # Capture the item if the tool response includes it
                if isinstance(func_response, dict):
                    item = func_response.get("item")
            else:
                func_response = function_mapping[func_name](**arguments)
        else:
            raise ValueError(f"Function '{func_name}' is not defined in function_mapping.")

        print("func response: ", func_response)

        response = {
            "role": "tool",
            "content": json.dumps(func_response),
            "tool_call_id": tool_call.id
        }
        responses.append(response)

    # Return the responses and item if an item was processed
    return responses, item


In [9]:
def call_gpt(history):
    """
    Handles the conversation flow and processes tool calls when invoked.
    """
    # Add the user's message to the conversation
    messages = [{'role': 'system', 'content': system_message}] + history
    response = openai.chat.completions.create(model=gpt_model, messages=messages, tools=tools)
    image = None
    audio_file = None  # Placeholder for audio file path

    while response.choices[0].finish_reason == 'tool_calls':
        tool_message = response.choices[0].message  # Extract tool call request
        tool_responses, item = handle_tool_call(tool_message)  # Process tool calls and get responses
        print(f'Tool responses: {tool_responses}')
        if tool_responses[0] and 'Sold' in tool_responses[0]['content']:
            image = create_item_image(item)
        # Add all tool responses to the conversation history
        messages.append(tool_message)  # Add tool call request to history
        messages.extend(tool_responses)  # Add all tool responses to history
        # Re-query GPT with the updated conversation history
        response = openai.chat.completions.create(model=gpt_model, messages=messages)

    reply = response.choices[0].message.content
    history += [{"role": "assistant", "content": reply}]

    # Generate audio for the reply
    audio_file = talker(reply)

    # Return the final response, image (if any), and audio file
    return history, image, audio_file


In [10]:
# add ?__theme=dark to the end of the url to force dark mode


In [11]:
with gr.Blocks() as ui:
    # Initialize a state variable to store the image
    image_state = gr.State(value=r"week2\woman_thief.jpeg")  # Use raw string or forward slashes

    with gr.Row():
        chatbot = gr.Chatbot(height=500, type="messages")
        # Use image_state to initialize the default image
        image_output = gr.Image(height=500, value=image_state.value)
        audio_output = gr.Audio(label="Assistant's Voice")  # Add audio output component
    with gr.Row():
        entry = gr.Textbox(label="Chat with our AI Assistant:")
    with gr.Row():
        clear = gr.Button("Clear")

    def do_entry(message, history):
        if isinstance(history, str):  # Convert history to list if it's a string
            history = json.loads(history)
        history += [{"role": "user", "content": message}]
        return "", history

    def call_gpt_and_update(chatbot_history, image_state):
        if isinstance(chatbot_history, str):  # Convert chatbot_history to list if needed
            chatbot_history = json.loads(chatbot_history)

        chatbot_history, new_image, audio = call_gpt(chatbot_history)

        # If a new image is provided, update the image state; otherwise, keep the current image
        updated_image = new_image if new_image else image_state
        return chatbot_history, updated_image, audio

    entry.submit(do_entry, inputs=[entry, chatbot], outputs=[entry, chatbot]).then(
        call_gpt_and_update,
        inputs=[chatbot, image_state],
        outputs=[chatbot, image_output, audio_output]
    )
    clear.click(lambda: None, inputs=None, outputs=chatbot, queue=False)

ui.launch(inbrowser=True, share=True)


* Running on local URL:  http://127.0.0.1:7860

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.




Audio generated and saved: output_audio.mp3
The chosen function is: list_items
The arguments are: {'items_for_sale': 'lock_picking_kit, reinforced_padlock, basic_door_lock, advanced_security_system, hidden_safe'}
list_items tool call has been made
func response:  ['phantom_lockpicks', 'unbreakable_shackle', 'spyglass_of_farseeing', 'howling_tripwire', 'shadow_coffer', "siren's_slip_oil", 'boots_of_ghostly_tread', "warden's_iron_chest", 'labyrinthine_door_mechanism', 'fists_of_fury_brass_knuckles', 'codex_of_veiled_defenses']
Tool responses: [{'role': 'tool', 'content': '["phantom_lockpicks", "unbreakable_shackle", "spyglass_of_farseeing", "howling_tripwire", "shadow_coffer", "siren\'s_slip_oil", "boots_of_ghostly_tread", "warden\'s_iron_chest", "labyrinthine_door_mechanism", "fists_of_fury_brass_knuckles", "codex_of_veiled_defenses"]', 'tool_call_id': 'call_eY2QvlrfzfwwwzUeYRzUcRCt'}]
Audio generated and saved: output_audio.mp3
The chosen function is: get_item_info
The arguments are: {