In [1]:
import json
import os
import random
import time
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union

import matplotlib.pyplot as plt
import requests
from PIL import Image
from termcolor import colored

import autogen
from autogen import Agent, AssistantAgent, ConversableAgent, UserProxyAgent
from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent

In [9]:
%%capture --no-stderr
# %pip install "pyautogen>=0.2.3"

In [2]:
config_list_4v = autogen.config_list_from_json(
    "OAI_CONFIG_LIST",
    filter_dict={
        "model": ["gpt-4-vision-preview"],
    },
)


config_list_gpt4 = autogen.config_list_from_json(
    "OAI_CONFIG_LIST",
    filter_dict={
        "model": ["gpt-4-1106-preview", "gpt-4-0314", "gpt4", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-v0314"],
    },
)

gpt4_llm_config = {"config_list": config_list_gpt4, "cache_seed": 42}

In [3]:
agent1 = MultimodalConversableAgent(
    name="image-explainer-Technical-Execution",
    max_consecutive_auto_reply=10,
    llm_config={"config_list": config_list_4v, "temperature": 0.5, "max_tokens": 300},
    system_message="Describe the image by assessing the skill level displayed in the use of medium (paint, pencil, digital, etc.). Look at elements like line quality, color blending, texture, and overall craftsmanship. Give your desciprion to image-explainer-3. And let image-explainer-Composition to describe from their own aspect",
)
agent2 = MultimodalConversableAgent(
    name="image-explainer-Composition",
    max_consecutive_auto_reply=10,
    llm_config={"config_list": config_list_4v, "temperature": 0.5, "max_tokens": 300},
    system_message="Describe the image by evaluating how elements are arranged within the artwork. Consider balance, symmetry, use of negative space, and how the eye is guided through the piece. A strong composition can create a sense of harmony or intentional discord. Give your desciprion to image-explainer-3.And let image-explainer-Color to describe from their own aspect",
)

agent4 = MultimodalConversableAgent(
    name="image-explainer-Color",
    max_consecutive_auto_reply=10,
    llm_config={"config_list": config_list_4v, "temperature": 0.5, "max_tokens": 300},
    system_message="Describe the image by analyze the color palette, how colors interact, their emotional impact, and if they contribute to the overall mood or theme of the piece. Give your description to image-explainer-3.",
)



agent3 = MultimodalConversableAgent(
    name="image-explainer-3",
    max_consecutive_auto_reply=10,
    llm_config={"config_list": config_list_4v, "temperature": 0.5, "max_tokens": 300},
    system_message="According to the description from other image-explainers, give a comprehensive description of the image.",
)
user_proxy = autogen.UserProxyAgent(
    name="User_proxy",
    system_message="A human admin.",
    code_execution_config={"last_n_messages": 2, "work_dir": "groupchat"},
    human_input_mode="NEVER",
)

# We set max_round to 5
groupchat = autogen.GroupChat(agents=[agent1, agent2,agent3,agent4, user_proxy], messages=[], max_round=10)
group_chat_manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=gpt4_llm_config)

user_proxy.initiate_chat(
    group_chat_manager,
    message="""Describe the image:
                        <img E:\prior_con\muted_grape_leaf_by_starwaltdesign_d8sz5bf-fullview.jpg>.""",
)

[33mUser_proxy[0m (to chat_manager):

Describe the image:
                        <img E:\prior_con\muted_grape_leaf_by_starwaltdesign_d8sz5bf-fullview.jpg>.

--------------------------------------------------------------------------------
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mimage-explainer-Technical-Execution[0m (to chat_manager):

The image is an abstract artwork that features a complex overlay of curvilinear lines and shapes. The color palette is subdued, consisting of various shades of gray, blue, and beige, creating a muted and harmonious effect. These colors appear to have been applied with a watercolor technique or a digital effect that mimics watercolor, as evidenced by the gradient transitions and the presence of what looks like paper texture.

The lines are thin and delicate, intertwining and overlapping to create a sense of depth and movement. Some lines form closed loops, while others meander across the composition. The image also contains a series of dots, some 