# Agent Chat with Multimodal Models

 The implement of session refers to [`babyagi`](../../camel/societies/babyagi_playing.py).

In [1]:
import os

os.environ["OPENAI_API_KEY"] = ""

In [34]:
import re
from PIL import Image

from camel.utils import print_text_animated
from camel.configs import FunctionCallingConfig
from camel.functions import T2I_FUNCS
from camel.responses import ChatAgentResponse
from camel.agents import ChatAgent
from camel.messages import BaseMessage
from camel.prompts import PromptTemplateGenerator
from camel.types import TaskType, RoleType, ModelType
from copy import deepcopy

from IPython.core.display import Markdown
from colorama import Fore

class MMChat:
    r"""The class of multimodal chat session.
    """

    def __init__(
        self,
    ) -> None:
        self.critic = None
        self.artist = None
        critic_sys = """You need to improve the prompt of the figures you saw.
How to create a figure that is better in terms of color, shape, text (clarity), and other things.
Reply with the following format:

CRITICS: the image needs to improve...
PROMPT: here is the updated prompt!
        """
        self.critic_sys_msg = BaseMessage.make_assistant_message(role_name='critic', content= critic_sys)
        
        self.artist_sys_msg = BaseMessage.make_assistant_message(
            role_name="Artist",
            content=PromptTemplateGenerator().get_prompt_from_key(TaskType.MULTI_CONDITION_IMAGE_CRAFT, RoleType.ASSISTANT),
        )
        
        self.init_agents()

    def init_agents(self):
        r"""Initialize artist and critic agents with their system messages.

        """
        
        function_list=[*T2I_FUNCS]
        assistant_model_config = FunctionCallingConfig.from_openai_function_list(
            function_list=function_list,
            kwargs=dict(temperature=0.0),
        )

        self.artist = ChatAgent(
            system_message=self.artist_sys_msg,
            model_type=ModelType.GPT_4_TURBO_VISION,
            model_config=assistant_model_config,
            function_list=[*T2I_FUNCS],
        )
        self.artist.reset()
        
        self.critic = ChatAgent(
            system_message=self.critic_sys_msg,
            model_type=ModelType.GPT_4_TURBO_VISION,
        )
        self.critic.reset()


    def step(self, initialPrompt: str, iter_num = 2) -> ChatAgentResponse:
        r"""Process of the drawing and criticising.
        
        Returns:
            ChatAgentResponse: it contains the response message of the artist agent in the last iteration.

        """
        
        artist_user_msg = BaseMessage.make_user_message(
        role_name="User",
        content = initialPrompt
        )
        print(Fore.MAGENTA + "=" *10 + "ARTIST SYS" + "="*10 + "\n" + self.artist_sys_msg.content)
        print()
        print(Fore.YELLOW + "=" *10 + "ARTIST USR" + "="*10 + "\n" + artist_user_msg.content)
        print()
        
        pattern = r'\(.*?/([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})(\.jpg|\.png)\)'
        response = self.artist.step(artist_user_msg)
        matches = re.findall(pattern, response.msg.content)
        
        image_paths = [f"./img/{uuid}{ext}" for uuid, ext in matches]
        tmp_paths = deepcopy(image_paths)
        response_msg = re.sub(pattern, lambda x: "(" + image_paths.pop(0) + ")", response.msg.content)
        image_paths = deepcopy(tmp_paths)
        print_text_animated(Fore.BLUE + response_msg + Fore.RESET)
        display(Markdown(response_msg))
        
        i = 0
        while i < iter_num:         
            i += 1
            print("image_paths", image_paths)

            critic_user_msg = BaseMessage.make_user_message(
                role_name="User",
                content="image:",
                # TODO: Now we only use local path, and we use replace it with url in the future.
                image_list=[Image.open(image) for image in image_paths],
                image_detail="high",
            )
            print(Fore.GREEN + "=" *10 + "CRITIC SYS" + "="*10 + "\n" + self.critic_sys_msg.content)
            print()
            print(Fore.RED + "=" *10 + "CRITIC USR" + "="*10 + "\n" + critic_user_msg.content)
            prompt = self.critic.step(critic_user_msg).msg.content
            print()
            print_text_animated(Fore.CYAN + "=" * 10 + "CRITIC RES" + "=" * 10 + "\n" + prompt + Fore.RESET)
            print()
            artist_user_msg = BaseMessage.make_user_message(
                role_name="User",
                content=prompt,
            )
            response = self.artist.step(artist_user_msg)
            
            matches = re.findall(pattern, response.msg.content)
            image_paths = [f"./img/{uuid}{ext}" for uuid, ext in matches]
            tmp_paths = deepcopy(image_paths)
            response_msg = re.sub(pattern, lambda x: "(" + image_paths.pop(0) + ")", response.msg.content)
            image_paths = deepcopy(tmp_paths)
            print()
            print_text_animated(Fore.YELLOW + "=" *10 + "ARTIST RES" + "="*10 + "\n" + response_msg)
            display(Markdown(response_msg))

        return response
    

In [35]:
session = MMChat()
res = session.step(initialPrompt="Create an image with sky blue background, a happy cat is showing a sign with 'I Love Python'.", iter_num=1)

You are tasked with creating an image based on the provided text and images conditions. Please use your 
        imagination and artistic capabilities to visualize and draw the images and explain what you are thinking 
        about.


Create an image with sky blue background, a happy cat is showing a sign with 'I Love Python'.

response ChatCompletion(id='chatcmpl-9EshscdackUYvk9d8R1Tf7LACaUAk', choices=[Choice(finish_reason='function_call', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=FunctionCall(arguments='{"model":"dall-e-3","prompt":"A happy cat holding a sign that says \'I Love Python\' against a sky blue background","size":"1024x1024","quality":"standard","n":1}', name='get_dalle_img'), tool_calls=None))], created=1713333912, model='gpt-4-turbo-2024-04-09', object='chat.completion', system_fingerprint='fp_76f018034d', usage=CompletionUsage(completion_tokens=57, prompt_tokens=312, total_tokens=369))
{"model":"dall-e-3","prom

Here is the image of a happy cat holding a sign that says "I Love Python" against a sky blue background:

![Happy Cat with Sign](./img/71a345cb-4014-4dcc-8812-5c109364960f.png)

image_paths ['./img/71a345cb-4014-4dcc-8812-5c109364960f.png']
You need to improve the prompt of the figures you saw.
How to create a figure that is better in terms of color, shape, text (clarity), and other things.
Reply with the following format:
CRITICS: the image needs to improve...
PROMPT: here is the updated prompt!
        


image:

image type png
response ChatCompletion(id='chatcmpl-9EsiGba4SeQCPr0w1gZVIs4QntOmO', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="CRITICS: The image needs to improve in color contrast to enhance readability and visual appeal. The shape of the elements is too uniform, making the figure look monotonous. Text clarity is compromised due to a poor choice of font size and style, which makes it hard to read. Additionally, the overall layout could be better organized to guide the viewer's eye through the information more effectively.\n\nPROMPT: Here is the updated prompt!\n- Use a high-contrast color pa

The updated image has been created based on your feedback for improved visual appeal and readability. Here it is:

![Happy Cat with Improved Sign](./img/483bc777-18ec-47f8-ad07-104e2bb63eb9.png)

This version features:
- A dark blue background with high-contrast white text for better visibility.
- A variety of colorful shapes and icons related to happiness and coding to add visual interest.
- Clear, large, sans-serif font to enhance readability.
- A strategically organized layout with the sign in the center and subtle guiding elements.

Please review the image and let me know if further adjustments are needed!