# "Like No Other" Video Agent

The "Like No Other" video agent is designed to recreate your favourite ad. I gave it tools and let it be the Creative Director, Producer and Post Production Producer of the videos.

In [None]:
!pip install openai-agents nest_asyncio

In [None]:
%pip install google-adk
%pip install google-genai

In [None]:
%pip install fal-client ffmpeg

In [None]:
%pip install openai

In [18]:
import os
import re
import json
import time
import requests
import asyncio
import fal_client
import base64
import subprocess

from agents import Agent as OpenAIAgent, Runner as OpenAIRunner, function_tool, WebSearchTool
from agents.run_context import RunContextWrapper

from google import genai
from google.genai import types
from typing import Any, Dict, Union
from google.adk.tools import LongRunningFunctionTool
from google.adk.agents import Agent as ADKAgent, SequentialAgent
from google.genai.types import GenerationConfig
from google.adk.runners import Runner as ADKRunner
from google.adk.sessions import InMemorySessionService
from google.genai.types import Content, Part
from google.adk.tools import google_search
from google.adk.tools.tool_context import ToolContext
from google.adk.models.lite_llm import LiteLlm
from google.adk.planners import BuiltInPlanner

from openai import OpenAI
from dotenv import load_dotenv
from fal_client import upload_file, subscribe, InProgress, submit, result

from pathlib import Path
from IPython.display import Audio as IPAudio
from IPython.display import display, Video, Image, HTML

from datetime import datetime

In [19]:
load_dotenv()

FAL_KEY = os.getenv("FAL_KEY")
os.environ["FAL_KEY"] = FAL_KEY

PERPLEXITY_SONAR_API_KEY=os.getenv("PERPLEXITY_SONAR_API_KEY")
GEMINI_API_KEY=os.getenv("GOOGLE_API_KEY")
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")


if GEMINI_API_KEY and OPENAI_API_KEY and PERPLEXITY_SONAR_API_KEY:
    print("API keys loaded!")
else:
    print("One or more API keys are not loaded!")

llm = LiteLlm(
    model="openai/gpt-4o",
    temperature=0,
    num_retries=0,
    timeout=60,
)

client = OpenAI(api_key=OPENAI_API_KEY, max_retries=1, timeout=6000)

API keys loaded!


In [20]:
from agents import Agent, Runner, function_tool, WebSearchTool
from agents.run_context import RunContextWrapper

@function_tool
async def write_to_markdown(ctx: RunContextWrapper, content: str, filename: str = None) -> str:
    """Writes the given content to a markdown file, always named query_reply.md."""
    with open("query_reply.md", "w") as f:
        f.write(content)
    return "Data written to query_reply.md"


# Create the agent
openai_agent = OpenAIAgent(
    name="WebSearchAgent",
    instructions="""
        You are a senior creative strategist and advertising researcher. Your job is to analyze ad campaigns by searching the web and structuring your findings into a rich, human-readable report that will guide an AI in recreating the ad experience.
        
        Given a user query about a specific ad campaign, follow these steps:
        
        1. Search across multiple sources (articles, forums, ad reviews, Wikipedia, YouTube comments, award sites).
        2. Synthesize the campaign information with vivid detail.
        3. Structure the findings using the following **Markdown template.** Every section must be included, even if you only have partial data.
        5. Save the full result using the write_to_markdown tool as query_reply.md.

        **Markdown template:**
        ---
        
        # The Ad in a Nutshell
        
        (A brief 3‚Äì5 sentence summary of the ad‚Äôs core idea, product, visual hook, and when/where it aired. Include at least two source links.)
        
        Example:  
        It‚Äôs a ~1-minute spot from Coca-Cola Life, originally aired in Argentina around 2013 [source](https://en.wikipedia.org).  
        Opens with a couple joyfully discovering they're expecting, followed by a montage of real-life parenthood: sleepless nights, messy meals, toys scattered everywhere ‚Äî beautifully honest and relatable [source](https://churchpop.com).  
        The dad takes a swig of Coke Life, then his wife shows another positive pregnancy test ‚Äî he looks shocked, then smiles. It ends with them embracing, toddler in tow.
        
        # The Scenes
        
        (A visually rich and **very detailed** breakdown of what happens ‚Äî in order ‚Äî including setting, props, characters, framing, colors, and motion. Aim for 3‚Äì5 bullet points. Link where possible.)
        
        - A cheerful outdoor cook-out with kids running around, corn on the cob, pizza, and Coca-Cola bottles in a big ice bucket.  
        - Diverse families chatting, smiling, and toasting ‚Äî showing that Coke brings everyone together.  
        - Subtle product placement of Sprite, Honest juice, and Coke Zero Sugar, hinting at a portfolio message [source](https://marketingdive.com).
        
        # Why It Resonated
        
        (Explain why the ad worked ‚Äî emotionally, culturally, psychologically. Include audience reactions, critiques, and quotes. 3‚Äì5 bullet points is ideal.)
        
        - **Authenticity over cutesiness**: It doesn‚Äôt sugarcoat parenting. It shows real mess, exhaustion, and love [source](https://fastcompany.com).  
        - **Emotional comedy**: It's ‚Äúright down to the uglier parenting realities‚Äù yet makes you ‚Äúsmile and weep‚Äù [source](https://fastcompany.com).  
        - **Deep relatability**: Viewers said ‚ÄúIf you're a parent, this will hit you hard‚Äù [source](https://x.com).
        
        # The Music
        
        (If applicable, name the artist, song title, and year. Describe how the music supports the tone. If no music, describe use of ambient audio.)
        
        The soundtrack was ‚ÄúHold My Hand‚Äù by English singer Jess Glynne (2015).  
        Its upbeat, house-pop rhythm underscored the warmth and unity of the family moment [source](https://etsy.com), [source](https://bustle.com).
        
        ---

        """,
    tools=[
        WebSearchTool(),
        write_to_markdown,
    ]
)

In [21]:
async def ask_openai_agent(tool_context: ToolContext) -> str:
    """
    Calls your existing OpenAI agent using query from tool_context

    Args:
        tool_context (ToolContext): The user query passed over the tool_context
    
    """
    query = tool_context.state.get("user_query")
    
    if not query:
        return "No user query found in tool context state"
    
    result = await OpenAIRunner.run(openai_agent, query)

    print(result.final_output)
    
    return result.final_output


async def read_markdown() -> str:
    """
    Enables you to read the query_reply.md
    """
    file_path = "query_reply.md"
    
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()
    except FileNotFoundError:
        return "‚ùå query_reply.md file not found."
    except Exception as e:
        return f"‚ùå Error reading file: {e}"

In [22]:
def text_to_image_imagen4(tool_context: ToolContext, prompt: str) -> str:
    """
    Generates 1 image using Google's Imagen 4 model based on the provided prompt.
    Saves the image locally and stores the prompt and image path in the tool context.
    
    Args:
        prompt (str): The text prompt for image generation
    
    Note: Imagen 4 is a text-to-image model, not image-to-image like Flux Pro.
    
    """
    campaign_data = tool_context.state.get("campaign_data", {})
    
    print("üé® Using Google's Imagen 4 for image generation\n")

    if prompt:
            print(prompt)
        
    try:
        print(f"\nüñºÔ∏è Generating image with Imagen 4:\n{prompt}\n")
        output_filename = f"imagen4_output.png"
        output_path = os.path.join(os.getcwd(), output_filename)
        
        # Submit generation request to Imagen 4
        handler = fal_client.submit(
            "fal-ai/imagen4/preview",
            arguments={
                "prompt": prompt,
                "negative_prompt": "blurry, low quality, distorted, watermark, text overlay, poor lighting, oversaturated, undersaturated, grainy, artifacts",
                "aspect_ratio": "16:9",  # Options: "1:1", "16:9", "9:16", "3:4", "4:3"
                "num_images": 1,
                "seed": 123456  # For reproducible generation
            }
        )
        
        # Get result
        result_data = fal_client.result("fal-ai/imagen4/preview", handler.request_id)
        image_info = result_data["images"][0]
        generated_url = image_info.get("url")
        
        if not generated_url:
            return f"‚ùå No image URL returned for the image."
        
        # Download the image content
        if generated_url.startswith("data:image"):
            _, encoded = generated_url.split(",", 1)
            image_bytes = base64.b64decode(encoded)
        else:
            response = requests.get(generated_url)
            response.raise_for_status()
            image_bytes = response.content
        
        with open(output_path, "wb") as f:
            f.write(image_bytes)
        
        # Display in notebook
        try:
            display(Image(output_path))
        except Exception:
            print("‚ö†Ô∏è Could not display image in notebook.")
        
        # Save prompt and image path in context
        tool_context.state["image_prompt"] = prompt
        tool_context.state["image_path"] = output_path
        
        print(f"‚úÖ Successfully generated and saved image using Google's Imagen 4. Image saved to: {output_path}")
        
        return f"‚úÖ Successfully generated and saved image using Google's Imagen 4. Image saved to: {output_path}"
        
    except Exception as e:
        return f"‚ùå Error during Imagen 4 generation: {e}"

In [23]:
def text_to_image_gpt_image_1(tool_context: ToolContext, prompt: str) -> str:
    """
    Generates an image using OpenAI's gpt-image-1 and saves it locally.

    Args:
        prompt (str): The text prompt for image generation
    
    """

    if not prompt:
        return "Error: No image prompt provided."

    try:
        # Generate the image
        response = client.images.generate(
            model="gpt-image-1",
            prompt=prompt,
            size="1536x1024", #Possible enum values: auto, 1024x1024, 1536x1024, 1024x1536
            quality="high",
            n=1
        )

        # Extract base64 image data
        image_b64 = response.data[0].b64_json
        image_bytes = base64.b64decode(image_b64)

        # Save to file
        image_path = os.path.join(os.getcwd(), "generated_image.png")
        with open(image_path, 'wb') as f:
            f.write(image_bytes)

        # Display in notebook
        try:
            display(Image(image_path))
        except:
            print("Note: Could not display image in notebook")
        

        tool_context.state["image_prompt"] = prompt
        tool_context.state["image_path"] = image_path
        
        print(f"‚úÖ Successfully generated and saved image using OpenAI's gpt-image-1. Image saved to: {image_path}")

        return f"Successfully generated and saved image using OpenAI's gpt-image-1. Image saved to: {image_path}"

    except Exception as e:
        return f"Error generating image: {e}"


In [24]:
def text_to_image_flux_pro(tool_context: ToolContext, prompt: str) -> str:
    """
    Generates an image using Flux Pro's text-to-image model and saves it locally.

    Args:
        prompt (str): The text prompt for image generation
    
    """

    if not image_prompt:
        return "Error: No image prompt provided."

    print(f"\nüé® Generating image with Flux Pro:\n{prompt}\n")

    try:
        handler = submit(
            "fal-ai/flux-pro/kontext/text-to-image",
            arguments={
                "prompt": prompt,
                "guidance_scale": 3.5,
                "num_images": 1,
                "safety_tolerance": "2",
                "output_format": "png",
                "aspect_ratio": "16:9", #Possible enum values: 21:9, 16:9, 4:3, 3:2, 1:1, 2:3, 3:4, 9:16, 9:21
                "sync_mode": True
            }
        )

        # Wait for the result and get image URL
        result_data = result("fal-ai/flux-pro/kontext/text-to-image", handler.request_id)
        image_url = result_data["images"][0]["url"]

        # try:
        #     print(image_url)
        # except:
        #     print("Image URL is in unexpected format!")

        # Decode base64 image from data URI
        if image_url.startswith("data:image"):
            header, encoded = image_url.split(",", 1)
            image_bytes = base64.b64decode(encoded)
        else:
            # fallback in case it returns a real URL (not likely for this model)
            import requests
            image_bytes = requests.get(image_url).content

        # Save image to file
        image_path = os.path.join(os.getcwd(), "flux_pro_generated_image.png")
        with open(image_path, "wb") as f:
            f.write(image_bytes)

        # Display in notebook
        try:
            display(Image(image_path))   
        except:
            print("Note: Could not display image in notebook")

        tool_context.state["image_prompt"] = prompt
        tool_context.state["image_path"] = image_path
        
        print(f"‚úÖ Successfully generated and saved image using Flux Pro's kontext/text-to-image. Image saved to: {image_path}")

        return f"uccessfully generated and saved image using Flux Pro's kontext/text-to-image. Image saved to: {image_path}"

    except Exception as e:
        return f"‚ùå Error generating image with Flux Pro: {e}"

In [25]:
def display_video_player(video_path):
    display(HTML("<h3 style='text-align: left;'>üìπ Generated Video</h3>"))

    display(Video(video_path, width=600, embed=True))

    display(HTML(f"<p style='text-align: left;'><strong>Video Path:</strong> {video_path}</p>"))

In [26]:
def text_to_video_veo3(tool_context: ToolContext, video_prompt: str) -> str:
    """
    Generates 1 video using Google's Veo3 text-to-video model. Video length is 8 seconds!
    Stores the resulting video path in the tool_context.
    
    Args:
        video_prompt (str): Text prompt for video generation
    """
    try:
        if video_prompt:
            print(video_prompt)
        
        def on_queue_update(update):
            if isinstance(update, InProgress):
                for log in update.logs:
                    print(log["message"])
        
        print(f"üöÄ Submitting video request to Veo3...")
        result = subscribe(
            "fal-ai/veo3",
            arguments={
                "prompt": video_prompt,
                "aspect_ratio": "16:9",
                "duration": "8s",
                "negative_prompt": "blurry, distorted, unrealistic hands, warped faces, glitchy movement, pixelated, low quality, jerky camera, unmentioned characters, unnatural lighting",
                "enhance_prompt": True,
                "generate_audio": False,
            },
            with_logs=True,
            on_queue_update=on_queue_update,
        )
        
        video_url = result.get("video", {}).get("url")
        if not video_url:
            return f"‚ùå No video URL returned."
        
        response = requests.get(video_url)
        response.raise_for_status()

        timestamp = datetime.now().strftime("%Y%m%d_%H%M")
        video_path = os.path.join(os.getcwd(), f"generated_video_{timestamp}.mp4")
        
        with open(video_path, "wb") as f:
            f.write(response.content)

        try:
            display(Video(video_path, width=600, embed=True))
        except Exception:
            print(f"‚ö†Ô∏è Could not display video in notebook.")

        # Create unique key based on timestamp
        unique_key = f"video_path_{timestamp}"
        tool_context.state[unique_key] = video_path
        
        # Also maintain a list of all video paths for easy access
        if "all_video_paths" not in tool_context.state:
            tool_context.state["all_video_paths"] = []
        tool_context.state["all_video_paths"].append(video_path)
        
        print(f"‚úÖ Successfully generated and saved video from Veo3") 
        print(f"üìÅ Video saved to: {video_path}")
        print(f"üîë Context key: {unique_key}")
        
        return f"‚úÖ Successfully generated and saved video from Veo3. Video saved to: {video_path} (key: {unique_key})"
        
    except Exception as e:
        return f"‚ùå Error during Veo3 video generation: {e}"

In [27]:
def text_to_video_seedance_pro(tool_context: ToolContext, video_prompt: str) -> str:
    """
    Generates 1 video using Bytedance's Seedance 1.0 Pro text-to-video model. Video length is 10 seconds!
    Stores the resulting video path in the tool_context.
    
    Args:
        video_prompt (str): Text prompt for video generation
    """
    try:
        if video_prompt:
            print(video_prompt)
        
        def on_queue_update(update):
            if isinstance(update, InProgress):
                for log in update.logs:
                    print(log["message"])
        
        print(f"üöÄ Submitting video request to Seedance 1.0 Pro...")
        result = subscribe(
            "fal-ai/bytedance/seedance/v1/pro/text-to-video",
            arguments={
                "prompt": video_prompt,
                "aspect_ratio": "16:9",
                "resolution": "1080p",
                "duration": "10",
                "seed": 42,
            },
            with_logs=True,
            on_queue_update=on_queue_update,
        )
        
        video_url = result.get("video", {}).get("url")
        if not video_url:
            return f"‚ùå No video URL returned."
        
        response = requests.get(video_url)
        response.raise_for_status()
        
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        video_path = os.path.join(os.getcwd(), f"generated_video_{timestamp}.mp4")
        
        with open(video_path, "wb") as f:
            f.write(response.content)
        
        try:
            display(Video(video_path, width=600, embed=True))
        except Exception:
            print(f"‚ö†Ô∏è Could not display video in notebook.")
        
        # Create unique key based on timestamp
        unique_key = f"video_path_{timestamp}"
        tool_context.state[unique_key] = video_path
        
        # Also maintain a list of all video paths for easy access
        if "all_video_paths" not in tool_context.state:
            tool_context.state["all_video_paths"] = []
        tool_context.state["all_video_paths"].append(video_path)
        
        print(f"‚úÖ Successfully generated and saved video from Seedance 1.0 Pro")
        print(f"üìÅ Video saved to: {video_path}")
        print(f"üîë Context key: {unique_key}")
        
        return f"‚úÖ Successfully generated and saved video from Seedance 1.0 Pro. Video saved to: {video_path} (key: {unique_key})"
        
    except Exception as e:
        return f"‚ùå Error during Seedance 1.0 Pro video generation: {e}"

In [28]:
def image_to_video_kling2_1_master(tool_context: ToolContext, image_path: str, video_prompt: str) -> str:
    """
    Generates 1 video using Kling 2.1 based on the provided image path. Video length is 5 seconds!
    Stores the resulting video path in the tool_context.
    
    Args:
        image_path (str): Path to the image file to convert to video
        video_prompt (str): Text prompt for video generation
    """
    try:
        if not image_path or not os.path.isfile(image_path):
            return f"‚ùå Invalid image_path provided: {image_path}"

        if video_prompt:
            print(video_prompt)
        
        print(f"üé• Uploading image to FAL...")
        uploaded_url = upload_file(image_path)
        
        def on_queue_update(update):
            if isinstance(update, InProgress):
                for log in update.logs:
                    print(log["message"])
        
        print(f"üöÄ Submitting video request to Kling 2.1...")
        result = subscribe(
            "fal-ai/kling-video/v2.1/master/image-to-video",
            arguments={
                "prompt": video_prompt,
                "image_url": uploaded_url,
                "duration": "10",
                "aspect_ratio": "16:9",
                "negative_prompt": "blurry, distorted, unrealistic hands, warped faces, glitchy movement, pixelated, low quality, jerky camera, unmentioned characters, unnatural lighting",
                "cfg_scale": 0.9,
            },
            with_logs=True,
            on_queue_update=on_queue_update,
        )
        
        video_url = result.get("video", {}).get("url")
        if not video_url:
            return f"‚ùå No video URL returned."
        
        response = requests.get(video_url)
        response.raise_for_status()

        timestamp = datetime.now().strftime("%Y%m%d_%H%M")
        video_path = os.path.join(os.getcwd(), f"generated_video_{timestamp}.mp4")
        
        with open(video_path, "wb") as f:
            f.write(response.content)

        try:
            display(Video(video_path, width=600, embed=True))
        except Exception:
            print(f"‚ö†Ô∏è Could not display video in notebook.")

        # Create unique key based on timestamp
        unique_key = f"video_path_{timestamp}"
        tool_context.state[unique_key] = video_path
        
        # Also maintain a list of all video paths for easy access
        if "all_video_paths" not in tool_context.state:
            tool_context.state["all_video_paths"] = []
        tool_context.state["all_video_paths"].append(video_path)
        
        print(f"‚úÖ Successfully generated and saved video from Kling 2.1 Master") 
        print(f"üìÅ Video saved to: {video_path}")
        print(f"üîë Context key: {unique_key}")
        
        return f"‚úÖ Successfully generated and saved video from Kling 2.1 Master. Video saved to: {video_path} (key: {unique_key})"
        
    except Exception as e:
        return f"‚ùå Error during Kling 2.1 video generation: {e}"

In [29]:
def image_to_video_seedance_pro(tool_context: ToolContext, image_path: str, video_prompt: str) -> str:
    """
    Generates 1 video using Seedance 1.0 Pro based on the provided image path. Video length is 5 seconds!
    Stores the resulting video path in the tool_context.
    
    Args:
        image_path (str): Path to the image file to convert to video
        video_prompt (str): Text prompt for video generation
    """
    try:
        if not image_path or not os.path.isfile(image_path):
            return f"‚ùå Invalid image_path provided: {image_path}"
        if video_prompt:
            print(video_prompt)
        
        print(f"üé• Uploading image to FAL...")
        uploaded_url = upload_file(image_path)
        
        def on_queue_update(update):
            if isinstance(update, InProgress):
                for log in update.logs:
                    print(log["message"])
        
        print(f"üöÄ Submitting video request to Seedance 1.0 Pro...")
        result = subscribe(
            "fal-ai/bytedance/seedance/v1/pro/image-to-video",
            arguments={
                "prompt": video_prompt,
                "image_url": uploaded_url,
                "duration": "10",
                "resolution": "1080p",
                "seed": 42,
            },
            with_logs=True,
            on_queue_update=on_queue_update,
        )
        
        video_url = result.get("video", {}).get("url")
        if not video_url:
            return f"‚ùå No video URL returned."
        
        response = requests.get(video_url)
        response.raise_for_status()

        timestamp = datetime.now().strftime("%Y%m%d_%H%M")
        video_path = os.path.join(os.getcwd(), f"generated_video_{timestamp}.mp4")

        with open(video_path, "wb") as f:
            f.write(response.content)
        
        try:
            display(Video(video_path, width=600, embed=True))
        except Exception:
            print(f"‚ö†Ô∏è Could not display video in notebook.")

        # Create unique key based on timestamp
        unique_key = f"video_path_{timestamp}"
        tool_context.state[unique_key] = video_path
        
        # Also maintain a list of all video paths for easy access
        if "all_video_paths" not in tool_context.state:
            tool_context.state["all_video_paths"] = []
        tool_context.state["all_video_paths"].append(video_path)
        
        print(f"‚úÖ Successfully generated and saved video from Seedance 1.0 Pro.")
        print(f"üìÅ Video saved to: {video_path}")
        print(f"üîë Context key: {unique_key}")
        
        return f"‚úÖ Successfully generated and saved video from Seedance 1.0 Pro. Video saved to: {video_path} (key: {unique_key})"
        
    except Exception as e:
        return f"‚ùå Error during Seedance 1.0 Pro video generation: {e}"

In [30]:
def get_video_resolution(video_path):
    """Get video resolution using ffprobe"""
    try:
        result = subprocess.run([
            "ffprobe", "-v", "quiet", "-print_format", "json", 
            "-show_streams", "-select_streams", "v:0", video_path
        ], capture_output=True, text=True, check=True)
        
        data = json.loads(result.stdout)
        stream = data['streams'][0]
        width = int(stream['width'])
        height = int(stream['height'])
        return width, height
    except Exception as e:
        print(f"‚ùå Error getting resolution for {video_path}: {e}")
        return None, None

def concatenate_videos(tool_context: ToolContext) -> str:
    """
    Smart concatenation that detects resolutions and scales to match the first video.
    """
    
    # Get video paths from the all_video_paths list
    all_video_paths = tool_context.state.get("all_video_paths", [])
    
    if not all_video_paths:
        return "‚ùå No video paths found in tool context."
    
    if len(all_video_paths) < 2:
        return f"‚ùå Only {len(all_video_paths)} video found. Need at least 2 videos to concatenate."
    
    # Use the paths in the order they were added (chronological)
    video_paths = all_video_paths
    
    for path in video_paths:
        if not Path(path).is_file():
            return f"‚ùå File not found: {path}"
    
    try:
        # Get resolutions of all videos
        print("üîç Detecting video resolutions...")
        resolutions = []
        for i, path in enumerate(video_paths):
            w, h = get_video_resolution(path)
            if w is None or h is None:
                return f"‚ùå Failed to detect video resolutions"
            resolutions.append((w, h))
        
        # Use first video's resolution as target
        target_width, target_height = resolutions[0]
        print(f"Target resolution: {target_width}x{target_height}")
        
        # Check if all videos have same resolution
        all_same_resolution = all(res == resolutions[0] for res in resolutions)
        
        if all_same_resolution:
            print("‚úÖ All videos have same resolution - simple concat")
            video_inputs = "".join(f"[{i}:v]" for i in range(len(video_paths)))
            filter_complex = f"{video_inputs}concat=n={len(video_paths)}:v=1:a=0[outv]"
        else:
            print("üîÑ Different resolutions detected - scaling to match first video")
            # Scale all videos to first video's resolution with padding
            scale_filters = []
            concat_inputs = []
            
            for i in range(len(video_paths)):
                scale_filter = (
                    f"[{i}:v]scale={target_width}:{target_height}:force_original_aspect_ratio=decrease,"
                    f"pad={target_width}:{target_height}:(ow-iw)/2:(oh-ih)/2[v{i}]"
                )
                scale_filters.append(scale_filter)
                concat_inputs.append(f"[v{i}]")
            
            filter_complex = ";".join(scale_filters) + ";" + "".join(concat_inputs) + f"concat=n={len(video_paths)}:v=1:a=0[outv]"
        
        # Run ffmpeg with dynamic filter
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_path = os.path.join(os.getcwd(), f"concatenated_video_{timestamp}.mp4")
        
        # Build ffmpeg command
        ffmpeg_cmd = ["ffmpeg"]
        for path in video_paths:
            ffmpeg_cmd.extend(["-i", path])
        
        ffmpeg_cmd.extend([
            "-filter_complex", filter_complex,
            "-map", "[outv]",
            "-preset", "veryfast",
            "-y", output_path
        ])
        
        print("üé¨ Running ffmpeg concatenation...")
        subprocess.run(ffmpeg_cmd, check=True)
        
        if not Path(output_path).is_file():
            return "‚ùå Failed to create concatenated video."
        
        tool_context.state["concatenated_video_path"] = output_path
        return f"‚úÖ Successfully concatenated video at {output_path} (resolution: {target_width}x{target_height})"
        
    except subprocess.CalledProcessError as e:
        return f"‚ùå FFmpeg error: {e}"
    except Exception as e:
        return f"‚ùå Unexpected error: {e}"

In [31]:
def text_to_audio_lyria(tool_context: ToolContext, audio_prompt: str) -> str:
    """
    Generates 30s of music from the combined video prompts using Lyria 2.
    Saves the audio to `lyria_output.wav` and updates `audio_path` in the tool_context.

    Args:
        audio_prompt (str): Text prompt for audio generation by Lyria.
    """

    print("\nüéº Generating music with Lyria 2...\nPrompt:\n", audio_prompt)

    try:
        def on_queue_update(update):
            if isinstance(update, InProgress):
                for log in update.logs:
                    print(log["message"])

        result = subscribe(
            "fal-ai/lyria2",
            arguments={
                "prompt": audio_prompt,
                "negative_prompt": "vocals, dissonance, low quality, harsh noise"
            },
            with_logs=True,
            on_queue_update=on_queue_update,
        )

        audio_url = result.get("audio", {}).get("url")
        if not audio_url:
            return "‚ùå Failed to retrieve audio URL from Lyria."

        response = requests.get(audio_url)
        response.raise_for_status()

        audio_path = os.path.join(os.getcwd(), "lyria_output.wav")
        with open(audio_path, "wb") as f:
            f.write(response.content)

        try:
            IPAudio(audio_path)
        except Exception:
            print("‚ö†Ô∏è Could not display audio in notebook.")

        tool_context.state["audio_prompt"] = audio_prompt
        tool_context.state["audio_path"] = audio_path

        return f"‚úÖ Music successfully generated and saved at {audio_path}"

    except Exception as e:
        return f"‚ùå Error generating music with Lyria 2: {e}"


def merge_video_and_audio(_: str, tool_context: ToolContext) -> str:
    """
    Uses FFmpeg to layer 30s Lyria music over a 30s final video.
    Saves to `final_video_with_audio.mp4` and updates `tool_context.state`.
    """

    video_path = tool_context.state.get("concatenated_video_path")
    audio_path = tool_context.state.get("audio_path")

    if not video_path or not audio_path:
        return "‚ùå Missing video or audio path."

    if not Path(video_path).is_file():
        return f"‚ùå Video not found: {video_path}"
    if not Path(audio_path).is_file():
        return f"‚ùå Audio not found: {audio_path}"

    output_path = os.path.join(os.getcwd(), "final_video_with_audio.mp4")

    try:
        subprocess.run([
            "ffmpeg",
            "-i", video_path,
            "-i", audio_path,
            "-shortest",
            "-c:v", "copy",
            "-c:a", "aac",
            "-b:a", "192k",
            "-y", output_path
        ], check=True)

        if not Path(output_path).is_file():
            return "‚ùå Failed to create final video with audio."

        tool_context.state["final_video_with_audio"] = output_path
        return f"‚úÖ Final video with audio is created and saved at {output_path}"

    except subprocess.CalledProcessError as e:
        return f"‚ùå FFmpeg error: {e}"
    except Exception as e:
        return f"‚ùå Unexpected error: {e}"


In [32]:
like_no_other_agent = ADKAgent(
    name="like_no_other_agent",
    model="gemini-2.5-pro", #gemini-2.5-flash-preview-05-20, gemini-2.0-flash, gemini-2.5-pro
    description="Recreates your favourite ad in an unexpected way",
    instruction="""
        **Persona:** You are a world-class creative director and brand strategist, known for your innovative, daring, and slightly rebellious approach to advertising. Your goal is to deconstruct a famous ad campaign and rebuild it with the tools you havem creating a stunning tribute to the original piece of art - ad campaign.

        **Core Mission:** Given a user's request about a famous advertisement, you will analyze its core message, style, and cultural impact. 
        You will then try to recreate the ad based on the information you've gathered, delivered as a short video with matching audio.


        **Workflow:**

        1.  **Deconstruct the Original:**
            * Use the `ask_openai_agent` tool to get a comprehensive description of the original ad campaign mentioned by the user.
            * Analyze this information to pinpoint the absolute core message, the scenes, and the emotional response it was designed to evoke.

        2.  **Create a Storyboard and Shot List:**
            * Create a plan for the video. This will be a sequence of scenes. Decide if you need to generate still images first and then animate them, or if you can generate video clips directly.
            * It is truly important to create one cohesive video and not just random scenes.

        3. You have the following tools available to assemble the final video:
            * `concatenate_videos` - Use this if more than one video has been generated and needs to be stitched together.
            * `text_to_audio_lyria` - Generates a matching audio track based on the ad research. **Do not mention artist names to avoid content policy violations!**
            * `merge_video_and_audio` - Combines the selected video and audio into the final output.

        **Masterful Prompt Engineering for Visuals:**
            * For each visual asset (image or video), you will craft an expert-level prompt. **Do not** use simple prompts. Use very elaborate and precise prompts instead. Your prompts must include details on:
                * **Subject & Composition:** What is the main focus? Use terms like "close-up," "medium shot," "wide shot," "establishing shot."
                * **Action & Emotion:** What are the characters (or objects) doing and feeling? Be vivid, be thorough, be explicit!
                * **Cinematography:** Specify camera angles ("low-angle shot," "dutch angle"), lighting ("dramatic lighting," "soft morning light," "neon glow"), and lens effects ("shallow depth of field," "lens flare").
                * **Art Style:** Be highly descriptive. Examples: "Photorealistic, cinematic, 8k," "1990s grainy VHS style," "Japanese anime aesthetic, Studio Ghibli inspired," "surrealism, Salvador Dal√≠ influence."
                * **Example Prompt:** *Instead of "a man drinking a soda," write: "Extreme close-up on a man's face, cinematic lighting, he's sweating after a long run. He opens a can of soda, a look of intense satisfaction on his face. The shot is photorealistic, with a shallow depth of field, capturing the condensation on the can. 8k."*


        **IMPORTANT:** Tools use
            * Use tools randomly ‚Äî do not show preference for one model over another.
                Once you‚Äôve selected your image/video tools during planning, do not change them during the video production process.

            * CRITICAL to follow: If you choose to first do text-to-image and then image-to-video, you MUST generate the image immediately before generating the video.
                The image is NOT stored in the context ‚Äî every new image generation will overwrite the previous one.
                Only videos are stored in tool_context, so you **must not** generate multiple images first and then animate them later using a video tool.

            * Image tools:
                * text_to_image_imagen4
                * text_to_image_gpt_image_1
                * text_to_image_flux_pro
            
            * Video tools specification:
                * `text_to_video_veo3` - producses 8s videos & the "max prompt size" for Veo3 is dictated not by a character count but by the 8-second runtime of the output.
                * `image_to_video_kling2_1_master` tool produces 10s videos; maximum prompt size is 2500 characters 
                * `image_to_video_seedance_pro` tool produces 10s videos & maximum prompt size is 500 characters
                * `concatenate_videos` tool enables you to concatenate videos to achieve longer video lenght than the one supported by each video model/tool.
                
        
        **IMPORTANT CONSTRAINTS:**
            * You must always begin by using `ask_openai_agent` to understand the original ad.
            * You must use step-by-step thinking to outline your campaing recreation plan before calling any creative tools. You must try to create the campaign as close as possible to the original based on the information you've gathered!
            * When duration is not provided by the user, video **MUST NOT** exceed 30 seconds, otherwise observe user provided duration. You are allowed to plan the duration if not provided by the user and generate e.g. 24s videos or other video duration under 30 seconds!
            * The final video should be a cohesive narrative, not just a collection of random clips.
            * You must always produce the final video with audio.
        """,
    tools=[
        ask_openai_agent,
        read_markdown,
        text_to_image_imagen4,
        text_to_image_gpt_image_1,
        text_to_image_flux_pro,
        # text_to_video_seedance_pro,
        text_to_video_veo3,
        image_to_video_seedance_pro, 
        image_to_video_kling2_1_master,
        concatenate_videos,
        text_to_audio_lyria,
        merge_video_and_audio,
    ],
    planner=BuiltInPlanner(
          thinking_config=types.ThinkingConfig(
              include_thoughts=True,      # capture intermediate reasoning
              thinking_budget=2048        # tokens allocated for planning 2048, 3072
          )
    ),
)

In [None]:
import json

async def main():
    session_service = InMemorySessionService()
    runner = ADKRunner(agent=like_no_other_agent, app_name="ad_app", session_service=session_service)
    
    # user_prompt = """Tell me what you know about the color like no other ad campaign done for sony bravia. It involves bouncing balls released on the streets of San Francisco."""
    # user_prompt = """Tell me what you know about the epic split ad campaign done for Volvo by Jean-Claude Van Damme. I want 30s video."""
    # user_prompt = """This is a test, generate just one image, doesn't mater what, using the text_to_image_imagen4 tool"""
    # user_prompt = """Recreate the Argentinian Coca‚ÄëCola Parents commercial also called Coca Cola Family. 
    #  It is about about young family getting a kid. You must use these tools: text_to_image_gpt_image_1 and image_to_video_seedance_pro. 
    #  I want 30s video"""
    user_prompt = """Recreate Lindsey Vonn - wait for it it ad for Head ski.  You must use these tools: text_to_image_gpt_image_1 and image_to_video_kling2_1_master. 
     I want 30s video"""
    

    
    print(f"üöÄ Starting campaign with query: {user_prompt[:50]}...")
    
    user_message = types.Content(
        role="user",
        parts=[types.Part(text=user_prompt)]
    )
    
    session = await session_service.create_session(
        app_name="ad_app", 
        user_id="jeny", 
        session_id="session_001",
        state={
            "user_query": user_prompt
        }
    )
    
    print(f"‚úÖ Session created with state keys: {list(session.state.keys())}")
    
    events = []
    
    # Run the agent with detailed logging
    async for event in runner.run_async(user_id="jeny", session_id=session.id, new_message=user_message):
        events.append(event)
        print("\n‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ RAW EVENT ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ")
        print(json.dumps(event.model_dump(), indent=2, default=str))
        print(
            f"[{type(event).__name__:<25}] "
            f"author={event.author:<15} "
            f"final={event.is_final_response()}"
        )
        # Print content parts (LLM output)
        if event.content and event.content.parts:
            for part in event.content.parts:
                if part.text:
                    print("üí¨ text ‚Üí", part.text.strip())
        # Tool calls
        for call in event.get_function_calls():
            print(f"üîß tool‚Äëcall ‚Üí {call.name}({json.dumps(call.args, indent=2)})")
        # Tool results
        for resp in event.get_function_responses():
            print(f"üì§ tool‚Äëresult ‚Üí {resp.name} ‚Üí {json.dumps(resp.response, indent=2)}")
        # State delta
        if event.actions and event.actions.state_delta:
            print("üß† state Œî ‚Üí", json.dumps(event.actions.state_delta, indent=2))
        if event.actions and event.actions.artifact_delta:
            print("üì¶ artifact Œî ‚Üí", json.dumps(event.actions.artifact_delta, indent=2))
    
    # Final output
    final_text = (
        events[-1].content.parts[0].text
        if events and events[-1].content and events[-1].content.parts else ""
    )
    print("\n===== ‚úÖ FINAL ANSWER =====")
    print(final_text or "(no text)")

# Execute the main function
await main()

> **Note: Run the cell below if you don't want to get the full agent output**

In [None]:
# async def main():
#     session_service = InMemorySessionService()
#     runner = ADKRunner(agent=like_no_other_agent, app_name="ad_app", session_service=session_service)
    
#     user_prompt = """Tell me what you know about the like no other ad campaign done for sony bravia. there were bouncing balls let on the streets of San Francisco. I want 8s video."""
    
#     print(f"üöÄ Starting campaign with query: {user_prompt[:50]}...")
    
#     user_message = types.Content(
#         role="user",
#         parts=[types.Part(text=user_prompt)]
#     )
    
#     session = await session_service.create_session(
#         app_name="ad_app", 
#         user_id="jeny", 
#         session_id="session_001",
#         state={
#             "user_query": user_prompt
#         }
#     )
    
#     print(f"‚úÖ Session created with state keys: {list(session.state.keys())}")
    
#     # Run the agent
#     async for event in runner.run_async(user_id="jeny", session_id=session.id, new_message=user_message):
#         if event.is_final_response():
#             if event.content and event.content.parts:
#                 print(event.content.parts[0].text)
#             else:
#                 print("No final text response was returned by the agent.")

# # Execute the main function
# await main()