In [1]:
from autogen import ConversableAgent, AssistantAgent, GroupChatManager, GroupChat, UserProxyAgent
import os

### Function tools

##### Setup env

In [2]:
import os
import sys
from dotenv import load_dotenv

load_dotenv()
PROJECT_PATH = os.environ.get('PROJECT_PATH')

# Add the project root path to sys.path
if PROJECT_PATH not in sys.path:
    sys.path.insert(0, PROJECT_PATH)

In [3]:
# from src.image_composition import compose_ad_frame
from src.pillow_utils import get_image_dimensions


In [4]:
from PIL import Image

def compose_ad_frame(frame_width: int, frame_height: int, elements: list, output_path: str = 'composed_image_frame.jpg') -> Image:
    """
    Composes an advertisement frame using multiple image elements.

    Args:
        frame_width (int): Width of the desired frame.
        frame_height (int): Height of the desired frame.
        elements (list): List of dictionaries, each containing:
            - image_path (str): Path to the image file.
            - position (tuple): (x, y) coordinates of the top-left corner.
            - size (tuple, optional): (width, height) to resize to (maintaining aspect ratio).
            - has_background (bool): Whether the image has a background (True) or is transparent (False).

    Returns:
        PIL.Image: The composed ad frame.
    """

    # Create a new blank RGBA image
    composed_frame = Image.new('RGBA', (frame_width, frame_height))

    for element in elements:
        # Load and convert image to RGBA
        img = Image.open(element['image_path']).convert('RGBA')

        # Resize if size is provided
        if 'size' in element:
            width, height = img.size
            aspect_ratio = width / height
            new_width, new_height = element['size']
            if new_width / new_height > aspect_ratio:
                new_width = int(new_height * aspect_ratio)
            else:
                new_height = int(new_width / aspect_ratio)
            img = img.resize((new_width, new_height))

        # Paste the image onto the frame
        if element.get('has_background', True):  # Default to True if not specified
            composed_frame.paste(img, element['position'])
        else:
            composed_frame.alpha_composite(img, element['position'])

    # Convert to RGB
    new_img = composed_frame.convert('RGB')
    
    # Save the image
    new_img.save(output_path)
    
    return output_path


#### Test compose ad frame

In [5]:

game_id = "0a22f881b77f00220f2034c21a18b854"
assets_path = os.path.join(PROJECT_PATH, 'data', 'Assets', game_id)

# Example usage
# assets_path = '/path/to/assets'
elements = [
    {'image_path': f'{assets_path}/header.jpg', 'position': (0, 0), 'has_background': True},
    {'image_path': f'{assets_path}/engagement_instruction_1.png', 'position': (40, 100), 'has_background': False},
    {'image_path': f'{assets_path}/thumbnail.jpg', 'position': (0, 200), 'size': get_image_dimensions(f'{assets_path}/thumbnail.jpg'), 'has_background': True}
]

composed_frame = compose_ad_frame(600, 500, elements)
# composed_frame.show()  # Or save using composed_frame.save('composed_frame.jpg')
print(composed_frame)


composed_image_frame.jpg


#### Registering tools - tools once created can be registered to the agents. This is done using the register_tool method. We can register the tool

In [6]:
import os

from autogen import ConversableAgent

config_list = [
    {
        "model": "gpt-4o",  # Specifies the model version to be used
    }
]


llm_config_assistant = {
    "model": "gpt-4o", # Updated to the latest model version
    "temperature": 0.7,  # Keeps the creativity level
     "config_list": config_list,  # References the LLM configuration defined above
    "functions": [
        {
            "name": "compose_ad_frame",
            "description": "Composes an advertisement frame using multiple image elements.",
            "parameters": {
                "type": "object",
                "properties": {
                    "frame_width": {
                        "type": "integer",
                        "description": "Width of the desired frame."
                    },
                    "frame_height": {
                        "type": "integer",
                        "description": "Height of the desired frame."
                    },
                    "elements": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "image_path": {
                                    "type": "string",
                                    "description": "Path to the image file."
                                },
                                "position": {
                                    "type": "array",
                                    "items": {
                                        "type": "integer"
                                    },
                                    "minItems": 2,
                                    "maxItems": 2,
                                    "description": "(x, y) coordinates of the top-left corner."
                                },
                                "size": {
                                    "type": "array",
                                    "items": {
                                        "type": "integer"
                                    },
                                    "minItems": 2,
                                    "maxItems": 2,
                                    "description": "(width, height) to resize to (maintaining aspect ratio), optional."
                                },
                                "has_background": {
                                    "type": "boolean",
                                    "description": "Whether the image has a background (True) or is transparent (False), optional."
                                }
                            },
                            "required": ["image_path", "position"]
                        },
                        "description": "List of dictionaries, each containing details about the image to be composed."
                    },
                    "output_path": {
                        "type": "string",
                        "description": "Path to save the composed ad frame, optional."
                    },
                },
                "required": ["frame_width", "frame_height", "elements"]
            }
        }
    ]
}
# Let's first define the assistant agent that suggests tool calls.
image_composition_agent = AssistantAgent(
    name="image_composition_agent",
    system_message="You are a helpful AI assistant. "
    "You can compose ad assets to make an AD Frame for StoryBoard. "
    "Return 'TERMINATE' when the task is done.",
    llm_config=llm_config_assistant,
    # llm_config={"config_list": [{"model": "gpt-4o", "api_key": os.environ["OPENAI_API_KEY"]}]},
)

# The user proxy agent is used for interacting with the assistant agent
# and executes tool calls.
# user_proxy = ConversableAgent(
#     name="User",
#     llm_config=False,
#     is_termination_msg=lambda msg: msg.get("content") is not None and "TERMINATE" in msg["content"],
#     human_input_mode="NEVER",
#     max_consecutive_auto_reply=3,
#     function_map={
#         "compose_ad_frame": compose_ad_frame  # Maps 'execute_rag_query' to the corresponding method
#     }
# )


##### Register tool separately

In [7]:

# # Register the tool signature with the assistant agent.
# assistant.register_for_llm(name="compose_ad_frame", description="An function to compose ad assets to make an AD Frame for StoryBoard")(compose_ad_frame)

# # Register the tool function with the user proxy agent.
# user_proxy.register_for_execution(name="compose_ad_frame")(compose_ad_frame)

#### use autogen.register_function function to register a tool with both agents at once.



In [8]:
# from autogen import register_function

# # Register the calculator function to the two agents.
# register_function(
#     compose_ad_frame,  # The function to register.
#     caller=assistant,  # The assistant agent can suggest calls to the calculator.
#     executor=user_proxy,  # The user proxy agent can execute the calculator calls.
#     name="compose_ad_frame",  # By default, the function name is used as the tool name.
#     description="An function to compose ad assets to make an AD Frame for StoryBoard",  # A description of the tool.
# )

### Using the tool

In [9]:
message = """
Compose an AD Frame with the dimensions 600x500 for StoryBoard
Use the following assets:
* Header image: header.jpg
 - Should be placed at the top-left corner
* Instruction image: engagement_instruction_1.png
  - Should be placed at (40, 100)
  - Transparent
* Thumbnail image: thumbnail.jpg
 - Should be placed at (0, 200)

The path to the assets folder is /home/hillary_kipkemoi/Automated-Storyboard-Synthesis-Digital-Advertising/data/Assets/0a22f881b77f00220f2034c21a18b854/

Use the assets path and the image names to get the image path

The output path is the data folder can be derived from assets path with the name composed_image_frame.jpg

Return 'TERMINATE' when the task is done.


"""
# chat_result = user_proxy.initiate_chat(image_composition_agent, message=message, max_turns=2)

##### Critic agent

In [10]:
# def evaluate_compose_frame(frame_path: str, features_in_frame: dict) -> str:
#   message = """
#   The compose frame is good however these are the recomendation to make it better:
#   Header image: header.jpg
#   - Should be placed at the top-left corner
#   Instruction image: engagement_instruction_1.png
#   - placed at (50, 100)
#   Thumbnail image: thumbnail.jpg
#   - Should be placed at (0, 200)
  
#   The path to the assets folder is /home/hillary_kipkemoi/Automated-Storyboard-Synthesis-Digital-Advertising/data/Assets/0a22f881b77f00220f2034c21a18b854/

#   Use the assets path and the image names to get the image path

#   The output path is the data folder can be derived from assets path with the name composed_image_frame.jpg

#   Return 'TERMINATE' when the task is done.
#   """
#   return message

##### The right setup 

In [11]:
# def get_frame_features(frame_path: str, assets_path: str) -> dict:
#   frame_features = {
#     "frame_path": frame_path,
#     "ad_frame_dimensions": (600, 500),
#     'text_bounded_box_format': ("left", "top", "right", "bottom"),
#     "elements": {
#         "header": {
#           'image_path': f'{assets_path}/header.jpg', 
#           'position': (0, 0),
#           'size': (600, 200),
#           'text': {'Ovexus': (50, 27, 245, 60), 'L/CERTIFIED': (50, 77, 161, 90), 'BY': (174, 77, 198, 90), 'LEXUS': (211, 77, 283, 90)},
#           'has_background': True
#         },
#         "engagement_instruction": {
#           'image_path': f'{assets_path}/engagement_instruction_1.png', 
#           'position': (40, 100),
#           'size': (380, 63),
#           'text': {'TAP': (0, 1, 64, 28), 'THE': (75, 1, 140, 28), 'SCREEN': (152, 0, 293, 29), 'to': (0, 40, 21, 58), 'find': (27, 36, 65, 58), 'the': (73, 36, 106, 58), 'nearest': (114, 40, 192, 58), 'Lexus': (200, 36, 261, 58), 'dealership': (268, 36, 380, 63)},
#           'has_background': False
#         },
#         "thubmnail": {
#           'image_path': f'{assets_path}/thumbnail.jpg', 
#           'position': (0, 200), 
#           'size': (600, 300),
#           'text': {},
#           'has_background': True
#         }
#     }
#   }
  
#   return frame_features

##### The setup with the not optimal placements

In [41]:
def get_frame_features(frame_path: str, assets_path: str) -> dict:
  frame_features = {
    "frame_path": frame_path,
    "ad_frame_dimensions": (600, 500),
    'text_bounded_box_format': ("left", "top", "right", "bottom"),
    'text_bounding_boxes': {'Ovexus': (50, 27, 245, 60), 'L/CERTIFIED': (50, 77, 161, 90), 'BY': (174, 77, 198, 90), 'LEXUS': (211, 77, 283, 90), 'TAP': (1, 101, 65, 128), 'THE': (76, 101, 141, 128), 'SCREEN': (153, 100, 294, 129), 'to': (1, 140, 22, 158), 'find': (28, 136, 66, 158), 'the': (74, 136, 107, 158), 'nearest': (115, 140, 193, 158), 'Lexus': (201, 136, 262, 158), 'dealership': (269, 136, 381, 163)},
    "elements": {
        "header": {
          'image_path': f'{assets_path}/header.jpg', 
          'position': (0, 0),
          'size': (600, 200),
          'text_in_image': ['Ovexus', 'L/CERTIFIED', 'BY', 'LEXUS'],
          'has_background': True
        },
        "engagement_instruction": {
          'image_path': f'{assets_path}/engagement_instruction_1.png', 
          'position': (0, 100),
          'size': (380, 63),
          'text_in_image': ['TAP', 'THE', 'SCREEN', 'to', 'find', 'the', 'nearest', 'Lexus', 'dealership'],
          'has_background': False
        },
        "thubmnail": {
          'image_path': f'{assets_path}/thumbnail.jpg', 
          'position': (0, 200), 
          'size': (600, 300),
          'text_in_image': {},
          'has_background': True
        }
    }
  }
  
  return {
    "frame_features": frame_features
  }

## Define the critic agent

In [47]:
llm_config_critic = {
    "model": "gpt-4o",  # Specify the model version for the critic
    "temperature": 0.5,  # Adjust the temperature for evaluation
    "config_list": config_list,  # Use the same LLM configuration list
    "functions": [
        # {
        #     "name": "evaluate_composed_frame",
        #     "description": "Evaluates the quality of the composed ad frame.",
        #     "parameters": {
        #         "type": "object",
        #         "properties": {
        #             "frame_path": {
        #                 "type": "string",
        #                 "description": "Path to the composed ad frame to be evaluated."
        #             },
        #             "features_in_frame": {
        #                 "type": "string",
        #                 "description": "The features that the compose ad frame contains."
        #             }
        #         },
        #         "required": ["frame_path", "features_in_frame"]
        #     }
        # },
        {
            "name": "get_frame_features",
            "description": "Gets detailed features of the frame using computer vision.",
            "parameters": {
                "type": "object",
                "properties": {
                    "frame_path": {
                        "type": "string",
                        "description": "Path to the image file."
                    },
                    "assets_path": {
                        "type": "string",
                        "description": "Path to the assets folder containing the image elements to compose."
                    }
                },
                "required": ["frame_path", "assets_path"]
            }
        }
    ]
}

critic_agent = AssistantAgent(
    name="image_critic_agent",
    system_message="""
    You are a critic AI agent. 
    Your task is to evaluate the quality of the composed ad frames. 
    You will execute function to get frame features that uses Computer vision tools to get the features of the frame.
    Make sure to listen for the results in this format: {'frame_features': ... }
    Check on the design of the frame and the text in the frame, ensure not overlapping, e.t.c and use the best design principles in the critique.
    You can align the image and text in the frame to make it more appealing. i.e the logo, slogan and the engagement instruction can be made to start on the same vertical line.
    If the frame is good, just say 'All good' and return 'TERMINATE' when the evaluation is complete.
    """,
    llm_config=llm_config_critic,
)


#### Set up configs for groupchat manager and user agent

In [48]:
llm_config = {
  "config_list": 
    [
      {"model": "gpt-4o"}
    ]
  }

llm_config_user = {
  "config_list": 
    [
      {"model": "gpt-3.5-turbo"}
    ]
}

## User proxy agent setup

In [49]:
system_message_user = """
"You are a the Human admin in the groupchat. 
You can interact with the image composition and the critic agents.
Execute their recommended functions and return the output as it is to the agents (Do not interpret the results).
"""

user_proxy = UserProxyAgent(
    name="User",
    llm_config=llm_config_user,
    system_message=system_message_user,
    is_termination_msg=lambda msg: msg.get("content") is not None and "TERMINATE" in msg["content"],
    human_input_mode="NEVER",
    code_execution_config=False,
    # max_consecutive_auto_reply=3,
    function_map={
        "compose_ad_frame": compose_ad_frame,
        "evaluate_composed_frame": evaluate_compose_frame,
        "get_frame_features": get_frame_features
    }
)

## Setup groupchat manager

In [52]:
from autogen.agentchat.groupchat import GroupChatManager

class CustomGroupChatManager(GroupChatManager):
    def _select_next_speaker(self, last_speaker, last_message, groupchat):
        # Prioritize image_critic_agent when frame_features are ready
        if (last_speaker.name == "User" and 
            "frame_features" in last_message.get("content", {}) and 
            "image_critic_agent" in groupchat.agent_names):
            return groupchat.agent_by_name("image_critic_agent")

        # For all other cases, use default behavior (or your custom logic)
        return super()._select_next_speaker(last_speaker, last_message, groupchat)


In [53]:

groupchat = GroupChat(agents=[user_proxy, image_composition_agent, critic_agent], messages=[])
manager = CustomGroupChatManager(groupchat=groupchat, llm_config=llm_config)  # Use the custom manager


## Initiate the conversation

In [54]:
message = """
Compose an AD Frame with the dimensions 600x500 for StoryBoard
Use the following assets:
* Header image: header.jpg
 - Should be placed at the top-left corner
* Instruction image: engagement_instruction_1.png
  - Should be placed at (40, 100)
  - Transparent
* Thumbnail image: thumbnail.jpg
 - Should be placed at (0, 200)

The path to the assets folder is /home/hillary_kipkemoi/Automated-Storyboard-Synthesis-Digital-Advertising/data/Assets/0a22f881b77f00220f2034c21a18b854/

Use the assets path and the image names to get the image path

The output path is the data folder can be derived from assets path with the name composed_image_frame.jpg

Return 'TERMINATE' when the task is done.


"""
chat_result = user_proxy.initiate_chat(manager, message=message, max_turns=30)

[33mUser[0m (to chat_manager):


Compose an AD Frame with the dimensions 600x500 for StoryBoard
Use the following assets:
* Header image: header.jpg
 - Should be placed at the top-left corner
* Instruction image: engagement_instruction_1.png
  - Should be placed at (40, 100)
  - Transparent
* Thumbnail image: thumbnail.jpg
 - Should be placed at (0, 200)

The path to the assets folder is /home/hillary_kipkemoi/Automated-Storyboard-Synthesis-Digital-Advertising/data/Assets/0a22f881b77f00220f2034c21a18b854/

Use the assets path and the image names to get the image path

The output path is the data folder can be derived from assets path with the name composed_image_frame.jpg

Return 'TERMINATE' when the task is done.




--------------------------------------------------------------------------------
[32m
Next speaker: image_composition_agent
[0m
[33mimage_composition_agent[0m (to chat_manager):

[32m***** Suggested function call: compose_ad_frame *****[0m
Arguments: 
{"frame_widt