In [7]:
from google.adk.agents import LlmAgent
from google.adk.tools import agent_tool
from dotenv import load_dotenv
import os

# Load environment variables (GOOGLE_API_KEY)
load_dotenv()

# Verify API key is loaded
if not os.getenv("GOOGLE_API_KEY"):
    print("WARNING: GOOGLE_API_KEY not found in environment variables!")
    print("Please add GOOGLE_API_KEY to your .env file")

# 1. A simple function tool for the core capability.
# This follows the best practice of separating actions from reasoning.
def generate_image(prompt: str) -> dict:
    """
    Generates an image based on a textual prompt.

    Args:
        prompt: A detailed description of the image to generate.

    Returns:
        A dictionary with the status and the generated image bytes.
    """
    print(f"TOOL: Generating image for prompt: '{prompt}'")
    # In a real implementation, this would call an image generation API.
    # For this example, we return mock image data.
    mock_image_bytes = b"mock_image_data_for_a_cat_wearing_a_hat"
    return {
        "status": "success",
        # The tool returns the raw bytes, the agent will handle the Part creation.
        "image_bytes": mock_image_bytes,
        "mime_type": "image/png"
    }


# 2. Refactor the ImageGeneratorAgent into an LlmAgent.
# It now correctly uses the input passed to it.
image_generator_agent = LlmAgent(
    name="ImageGen",
    model="gemini-2.0-flash-exp",
    description="Generates an image based on a detailed text prompt.",
    instruction=(
        "You are an image generation specialist. Your task is to take the user's request "
        "and use the `generate_image` tool to create the image. "
        "The user's entire request should be used as the 'prompt' argument for the tool. "
        "After the tool returns the image bytes, you MUST output the image."
    ),
    tools=[generate_image]
)

# 3. Wrap the corrected agent in an AgentTool.
# The description is taken from the agent's description field.
image_tool = agent_tool.AgentTool(
    agent=image_generator_agent
)

# 4. The parent agent remains unchanged. Its logic was correct.
artist_agent = LlmAgent(
    name="Artist",
    model="gemini-2.0-flash-exp",
    instruction=(
        "You are a creative artist. First, invent a creative and descriptive prompt for an image. "
        "Then, use the `ImageGen` tool to generate the image using your prompt."
    ),
    tools=[image_tool]
)

# --- How it works now ---
# 1. The `artist_agent` decides on a prompt, e.g., "A photorealistic cat wearing a tiny top hat."
# 2. It calls the tool: `ImageGen(input="A photorealistic cat wearing a tiny top hat.")`
#    (Note: AgentTool uses 'input' as the default parameter name for the sub-agent's query).
# 3. The `agent_tool` invokes `image_generator_agent` with the prompt as its input.
# 4. The `image_generator_agent` follows its instructions and calls `generate_image(prompt="...")`.
# 5. The function returns the image bytes.
# 6. `image_generator_agent` returns the final image as its result.
# 7. `artist_agent` receives the image result from the tool call.

print("Agents defined successfully!")

Agents defined successfully!


In [8]:
# --- Execute the artist agent ---
from google.adk.runners import Runner
from google.adk.sessions import InMemorySessionService
from google.genai import types
import asyncio

# Define variables for session setup
APP_NAME = "image_artist_app"
USER_ID = "user_123"
SESSION_ID = "session_001"

# Agent Interaction
async def run_artist_agent(query: str):
    """
    Helper function to call the artist agent with a query.
    """
    # Session and Runner setup
    session_service = InMemorySessionService()
    session = await session_service.create_session(
        app_name=APP_NAME, 
        user_id=USER_ID, 
        session_id=SESSION_ID
    )
    runner = Runner(
        agent=artist_agent, 
        app_name=APP_NAME, 
        session_service=session_service
    )

    content = types.Content(role='user', parts=[types.Part(text=query)])
    
    print(f"\n--- Running Artist Agent with query: '{query}' ---\n")
    
    # Use async iteration for run_async
    async for event in runner.run_async(
        user_id=USER_ID, 
        session_id=SESSION_ID, 
        new_message=content
    ):
        if event.is_final_response() and event.content:
            final_response = ""
            
            # Extract text response
            if hasattr(event.content, 'text') and event.content.text:
                final_response = event.content.text
            elif event.content.parts:
                text_parts = [part.text for part in event.content.parts if part.text]
                final_response = "".join(text_parts)
            
            print("\n" + "=" * 80)
            print("Artist Agent Response:")
            print("=" * 80)
            print(final_response)
            
            # Check for image data
            if event.content.parts:
                for part in event.content.parts:
                    if hasattr(part, 'inline_data') and part.inline_data:
                        print(f"\nImage generated: {part.inline_data.mime_type}")
                        print(f"Image size: {len(part.inline_data.data)} bytes")
            
            print("=" * 80)

# Run the artist agent
await run_artist_agent("Create an artistic image")


--- Running Artist Agent with query: 'Create an artistic image' ---





TOOL: Generating image for prompt: 'A bioluminescent jellyfish gracefully floats through a dreamlike underwater landscape filled with towering coral structures that resemble ancient temples. Schools of tiny, glowing fish dart around, creating swirling patterns of light. The water is a deep, ethereal blue, with rays of sunlight filtering through the surface, creating an otherworldly atmosphere. The overall style should be reminiscent of a watercolor painting with a touch of surrealism.'

Artist Agent Response:
I have generated the image based on your prompt. The image is ready.

