In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Setup
----

### Install Google Gen AI SDK for Python

In [None]:
%pip install --upgrade --quiet google-genai

### Setup ffmpeg

In [None]:
!apt-get update -qq && apt-get install -y ffmpeg -qq

### Authenticate your notebook environment (Colab only)
If you are running this notebook on Google Colab, run the following cell to authenticate your environment.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Import libraries

In [None]:
import time

from IPython.display import Markdown, Video, display
from google import genai
from google.genai import types

import json
import numpy as np
import pandas as pd

import concurrent.futures

### Set Google Cloud project information and create client

In [None]:
import os

PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

### Define a helper function to display media

In [None]:
def show_video(video):
    if isinstance(video, str):
        file_name = video.split("/")[-1]
        !gsutil cp {video} {file_name}
        display(Video(file_name, embed=True, width=600))
    else:
        with open("sample.mp4", "wb") as out_file:
            out_file.write(video)
        display(Video("sample.mp4", embed=True, width=600))

### Setup Gemini and video generation models

In [None]:
video_model = "veo-3.0-generate-001" #@param{type:"string"} ["veo-3.0-generate-001", "veo-3.0-fast-generate-001"]

gemini_model = "gemini-2.5-flash" #@param{type:"string"} ["gemini-2.5-flash", "gemini-2.5-pro", "gemini-2.5-flash-lite"]

## Setup elements of desired videos

In [None]:
subject_theme = "ice cream flavors" #@param {type:"string"}
scene_theme = "fun summertime outdoor environments" #@param {type:"string"}
style = "Photorealistic"  # @param ["None", "Photorealistic", "Cinematic", "Vintage", "Japanese anime", "Claymation", "Stop-motion animation", "Van Gogh", "Surrealist painting", "Monochromatic black and white", "Vibrant and saturated", "Film noir style", "High-key lighting", "Low-key lighting", "Golden hour glow", "Volumetric lighting", "Backlighting to create a silhouette"]

num_items = 10 #@param {type:"integer", min:1, max:10}

# Generate Veo Prompts Using User-Provided Parameters
---

### Get lists of items that fit subject and scene themes

In [None]:
def get_list_of_items_that_fit_theme(num_items, theme):

    list_generation_system_instruction = types.Part.from_text(text="""
    You are an expert list creator, who can create lists of various sizes that
    fit a specific theme.

    Make sure that each item returned is real and fits within the theme, and
    references the subject theme in the result - e.g. "vanilla ice cream"
    instead of just "vanilla."

    Make sure the visual description captures essential elements of how an item
    of this theme would show up visually (e.g. ice cream in a bowl or cone).

    Rank the items based on the prevalence, popularity, quality, or otherwise
    superiority of the items (with criteria depending on the list theme).

    Only return the list of items in ranked order along a visual description of
    each item, no other text.
    """)

    ranked_list_response_schema = {
      "type": "ARRAY",
      "items": {
        "type": "OBJECT",
        "properties": {
          "item": {
            "type": "STRING"
          },
          "visual_description": {
            "type": "STRING"
          },
          "rank": {
            "type": "STRING",
            "enum": [str(i) for i in range(1, num_items + 1)]
          }
        },
        "required": [
          "item",
          "visual_description",
          "rank"
          ]
      }
    }

    list_generation_config = types.GenerateContentConfig(
      temperature = 1,
      top_p = 0.95,
      max_output_tokens = 8192,
      system_instruction = [list_generation_system_instruction],
      response_modalities = ["TEXT"],
      response_mime_type = "application/json",
      response_schema = ranked_list_response_schema
      )

    list_generation_prompt = types.Part.from_text(text=
    f"Create a list of {num_items} that fit the theme of {theme}.")

    list_generation_response = client.models.generate_content(
      model=gemini_model,
      contents=list_generation_prompt,
      config=list_generation_config
      )

    list_generation_response_text = list_generation_response.text

    return(list_generation_response_text)

gemini_subject_items_list = get_list_of_items_that_fit_theme(num_items,
  subject_theme)

print(gemini_subject_items_list)

print("\n\n")

gemini_scene_items_list = get_list_of_items_that_fit_theme(num_items,
  scene_theme)

print(gemini_scene_items_list)

### Get prompts for Veo that combine subjects, scenes, and style

In [None]:
def generate_video_prompt(subject, subject_visual_description, scene,
  scene_visual_description, style):

    gemini_video_prompt = f"""
    You are an expert video prompt engineer for Google's Veo model.
    Your task is to construct the most effective and optimal prompt string
    that showcases the subject {subject}, as described by
    {subject_visual_description}, in the scene {scene}, as described by
    {scene_visual_description}, using a {style} style.

    Synthesize these pieces into a single, cohesive, and cinematic instruction.
    Make sure that the size of the subject is appropriate relative to the scene.
    Add in instructions for relevant audio, whether that be human speech,
    background noise, music, or other sound effects.

    Do not add any other new concepts. Output ONLY the final prompt string,
    without any introduction or explanation.
    """

    gemini_video_prompt_response = client.models.generate_content(
        model=gemini_model,
        contents=gemini_video_prompt,
    )

    return gemini_video_prompt_response.text

items_and_scenes_combined = pd.merge(
  pd.DataFrame(json.loads(gemini_subject_items_list)).
    assign(random = np.random.permutation(np.arange(1, num_items+1))),
  pd.DataFrame(json.loads(gemini_scene_items_list)).
    assign(random = np.random.permutation(np.arange(1, num_items+1))),
  on = "random",
  suffixes = ("_subject", "_scene")
  ).drop(columns = ["random"])

def apply_generate_video_prompt_to_row(row):
  return generate_video_prompt(row['item_subject'],
    row['visual_description_subject'], row['item_scene'],
    row['visual_description_scene'], style)

with concurrent.futures.ThreadPoolExecutor() as executor:
  video_prompts = list(executor.map(apply_generate_video_prompt_to_row,
    items_and_scenes_combined.to_dict('records')))

items_and_scenes_combined['video_prompt'] = video_prompts

display(items_and_scenes_combined)

# Generate Videos for Each Prompt
----

In [None]:
def generate_video_from_prompt(video_prompt):

  video_gen_operation = client.models.generate_videos(
    model=video_model,
    prompt=video_prompt,
    config=types.GenerateVideosConfig(
      aspect_ratio="16:9",
      number_of_videos=1,
      duration_seconds=8,
      resolution="1080p",
      person_generation="allow_adult",
      enhance_prompt=True,
      generate_audio=True
      )
    )

  print(f"Starting video generation with prompt:\n {video_prompt}\n")

  while not video_gen_operation.done:
    time.sleep(15)
    video_gen_operation = client.operations.get(video_gen_operation)
    print("Video generation in progress")

  if video_gen_operation.response:
    gen_video = video_gen_operation.result.generated_videos[0].video

    return gen_video

def generate_video_for_row(row):
  return generate_video_from_prompt(row['video_prompt'])

with concurrent.futures.ThreadPoolExecutor() as executor:
  videos = list(executor.map(generate_video_for_row,
    items_and_scenes_combined.to_dict('records')))

items_and_scenes_combined['video'] = videos

display(items_and_scenes_combined)

# Putting Things Together and Outputting
---

### Put all prompts together into 1 text string

In [None]:
# Limit down to items and scenes where actual video was generated
items_and_scenes_with_video = (items_and_scenes_combined[
  ~pd.isna(items_and_scenes_combined['video'])].
  reset_index(drop = True)
  )

all_items_scenes_prompts = ""

for index, row in items_and_scenes_with_video.iterrows():
  all_items_scenes_prompts += (
  f"Video {index + 1}\n"
  f"Subject: {row['item_subject']}\n"
  f"Scene: {row['item_scene']}\n"
  f"Prompt: {row['video_prompt']}\n\n\n"
  )

print(all_items_scenes_prompts)

with open('all_items_scenes_prompts.txt', 'w') as f:
  f.write(all_items_scenes_prompts)

In [None]:
videos_list = items_and_scenes_with_video['video'].tolist()

# Create a directory to store the individual video files
!mkdir -p /videos

for i, video in enumerate(videos_list):
  with open(f"/videos/video_{i+1}.mp4", "wb") as f:
    f.write(video.video_bytes)

# Create a file containing the list of video files
with open("/videos/video_list.txt", "w") as f:
  for i in range(len(videos_list)):
    f.write(f"file '/videos/video_{i+1}.mp4'\n")

# Concatenate the videos using ffmpeg
!ffmpeg -f concat -safe 0 -i /videos/video_list.txt -c copy combined_video.mp4

### Download Output Files

In [None]:
from google.colab import files
files.download('all_items_scenes_prompts.txt')
files.download('combined_video.mp4')

In [None]:
# Display the combined video
show_video("combined_video.mp4")