In [1]:
import os
from matplotlib import pyplot as plt
import cv2
import requests
from PIL import Image
from pathlib import Path
import base64

from dotenv import load_dotenv
from openai import AzureOpenAI
from openai.types.beta import Thread, Assistant

# Load environment variables from .env file
load_dotenv()

# Initialize Azure OpenAI clients for different assistants
assistant_client = AzureOpenAI(
    api_key=os.getenv("GPT4_AZURE_OPENAI_KEY"),
    api_version=os.getenv("GPT4_AZURE_OPENAI_API_VERSION"),
    azure_endpoint=os.getenv("GPT4_AZURE_OPENAI_ENDPOINT")
)

# Define Azure OpenAI deployment names
assistant_deployment_name = os.getenv("GPT4_DEPLOYMENT_NAME")
dalle_deployment_name = os.getenv("DALLE3_DEPLOYMENT_NAME")
vision_deployment_name = os.getenv("GPT4VISION_DEPLOYMENT_NAME")


In [2]:
tools_dalle = [
    {
        "type": "function",
        "function": {
            "name": "generate_image",
            "description": "Creates and displays an image",
            "parameters": {
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "The prompt to be used to create the image",
                    }
                },
                "required": ["prompt"],
            },
        },
    }
]

tools_vision = [
    {
        "type": "function",
        "function": {
            "name": "analyze_image",
            "description": "Analyzes and critiques an image",
            "parameters": {"type": "object", "properties": {}, "required": []},
        },
    }
]

tools_user_proxy = [
    {"type": "code_interpreter"},
    {
        "type": "function",
        "function": {
            "name": "send_message",
            "description": "Send messages to other agents in this group chat.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The message to be sent",
                    },
                    "agent_name": {
                        "type": "string",
                        "description": "The name of the agent to execute the task.",
                    },
                },
                "required": ["query", "agent_name"],
            },
        },
    },
]


In [3]:
instructions_dalle = """As a premier AI specializing in image generation, you possess the expertise to craft precise visuals based on given prompts. It is essential that you diligently generate the requested image, ensuring its accuracy and alignment with the user's specifications, prior to delivering a response."""

instructions_vision = """As a leading AI expert in image analysis, you excel at scrutinizing and offering critiques to refine and improve images. Your task is to thoroughly analyze an image, ensuring that all essential assessments are completed with precision before you provide feedback to the user. You have access to the local file system where the image is stored."""

instructions_user_proxy = """As a user proxy agent, your primary function is to streamline dialogue between the user and the specialized agents within this group chat. You are tasked with articulating user inquiries with clarity to the relevant agents and maintaining a steady flow of communication to guarantee the user's request is comprehensively addressed. Please withhold your response to the user until the task is completed, unless an issue is flagged by the respective agent or when you can provide a conclusive reply.

You have access to the local file system where files are stored. For example, you can access the image generated by the DALL-E assistant and send it to the Vision assistant for analysis.

You have access to the following agents to accomplish the task:
- dalle_assistant
- vision_assistant

If the agents above are not enough or are out of scope to complete the task, then run send_message with the name of the agent.

When outputting the agent names, use them as the basis of the agent_name in the send message function, even if the agent doesn't exist yet.
"""

# Create assistants
dalle_assistant = assistant_client.beta.assistants.create(
    name="dalle_assistant",
    instructions=instructions_dalle,
    model=assistant_deployment_name,
    tools=tools_dalle
)

vision_assistant = assistant_client.beta.assistants.create(
    name="vision_assistant",
    instructions=instructions_vision,
    model=assistant_deployment_name,
    tools=tools_vision
)

user_proxy = assistant_client.beta.assistants.create(
    name="user_proxy",
    instructions=instructions_user_proxy,
    model=assistant_deployment_name,
    tools=tools_user_proxy
)


APIConnectionError: Connection error.

In [None]:
agents_threads = {
    "dalle_assistant": {"agent": dalle_assistant, "thread": None},
    "vision_assistant": {"agent": vision_assistant, "thread": None},
}

def send_message(query: str, agent_name: str) -> str:
    if agent_name not in agents_threads:
        return f"Agent '{agent_name}' does not exist."

    recipient_info = agents_threads[agent_name]

    if not recipient_info["thread"]:
        recipient_info["thread"] = assistant_client.beta.threads.create()

    return dispatch_message(query, recipient_info["agent"], recipient_info["thread"])

def dispatch_message(message: str, agent: Assistant, thread: Thread) -> str:
    for tool in agent.tools:
        if hasattr(tool, "function"):
            function_name = tool.function.name
            if function_name in globals():
                function_to_call = globals()[function_name]
                return function_to_call(message)

    return "Placeholder response"


In [None]:
def generate_image(prompt: str) -> str:
    # Using DALL-E assistant to generate an image based on the provided prompt
    response = assistant_client.images.generate(
        model=dalle_deployment_name,
        prompt=prompt,
        size="1024x1024",
        quality="standard",
        n=1
    )

    # Saving and displaying the generated image
    image_url = response.data[0].url
    im = Image.open(requests.get(image_url, stream=True).raw)
    filename = "temp.jpg"
    local_path = Path(filename)
    im.save(local_path)
    img = cv2.imread("temp.jpg", cv2.IMREAD_UNCHANGED)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img_rgb)
    plt.axis("off")
    plt.show()

    return f"Image generated successfully. Path: {local_path.absolute()}"

def analyze_image(message: str) -> str:
    # Analyzing and critiquing an image using Vision assistant
    image_path = Path("temp.jpg")
    with image_path.open("rb") as image_file:
        base64_image = base64.b64encode(image_file.read()).decode("utf-8")

    content_images = [
        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
    ]

    response = assistant_client.chat.completions.create(
        model=vision_deployment_name,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": message,
                    },
                    *content_images,
                ],
            }
        ],
        max_tokens=1000,
    )

    return f"Vision Assistant: {response.choices[0].message.content}"


In [None]:
user_query = input("User Query: ")

response_dalle = send_message(user_query, "dalle_assistant")
print("Dalle" + response_dalle)

critique_message = "Analyze and critique this image and generate a new enhanced prompt for DALL-E with the criticism and analysis."
response_vision = send_message(critique_message, "vision_assistant")
print("Vision" + response_vision)
