# Cosmos Predict2 Full Pipeline on A100\n
\n
This notebook runs both T5 encoding and Cosmos Predict2 inference on a single A100 GPU.\n
\n
**Requirements:**\n
- Google Colab with A100 runtime\n
- 40GB GPU memory\n
\n
**Note:** Make sure to select `Runtime > Change runtime type > A100 GPU` before running.

## 1. Installation Setup

Choose installation method: GitHub source (latest features) or PyPI (stable release).

In [None]:
%%capture
# Install other required dependencies
!pip install -q transformers accelerate bitsandbytes
!pip install -q decord einops imageio[ffmpeg]
!pip install -q opencv-python-headless pillow

print("Installation complete!")

### Install Additional Dependencies

In [None]:
%%capture
if not USE_GITHUB:
    # Install PyTorch with CUDA support
    !pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
    
    # Install Cosmos Predict2 from PyPI
    !pip install -q "cosmos-predict2[cu126]" --extra-index-url https://nvidia-cosmos.github.io/cosmos-dependencies/cu126_torch260/simple
    
    print("✅ Installed from PyPI")

### Install from PyPI

In [None]:
%%capture
if USE_GITHUB:
    # Clone the repository
    !git clone https://github.com/NVIDIA/Cosmos-Predict2.git /content/cosmos-predict2
    
    # Change to the repo directory
    import os
    os.chdir('/content/cosmos-predict2')
    
    # Install PyTorch with CUDA support
    !pip install -q --upgrade pip
    !pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
    
    # Install cosmos-predict2 from source with CUDA support
    !pip install -q -e ".[cu126]" --extra-index-url https://nvidia-cosmos.github.io/cosmos-dependencies/cu126_torch260/simple
    
    # Add to Python path
    import sys
    sys.path.insert(0, '/content/cosmos-predict2')
    
    print("✅ Installed from GitHub source")

### Install from GitHub Source

In [None]:
%%capture
# Set installation method
USE_GITHUB = True  # Set to True for latest features from GitHub, False for stable PyPI release

if USE_GITHUB:
    print("Installing Cosmos Predict2 from GitHub source...")
else:
    print("Installing Cosmos Predict2 from PyPI...")

In [None]:
# Verify installations and setup paths
import pkg_resources
import os
import sys

# Add cosmos-predict2 to path if using GitHub installation
if os.path.exists('/content/cosmos-predict2'):
    sys.path.insert(0, '/content/cosmos-predict2')
    COSMOS_PATH = '/content/cosmos-predict2'
    print(f"✅ Using Cosmos Predict2 from GitHub: {COSMOS_PATH}")
else:
    COSMOS_PATH = None
    print("Using Cosmos Predict2 from pip installation")

# Verify key packages
packages = ['cosmos-predict2', 'transformers', 'torch', 'decord']
for package in packages:
    try:
        version = pkg_resources.get_distribution(package).version
        print(f"✅ {package}: {version}")
    except:
        # For GitHub install, cosmos-predict2 might not show up in pkg_resources
        if package == 'cosmos-predict2' and COSMOS_PATH:
            print(f"✅ cosmos-predict2: installed from source at {COSMOS_PATH}")
        else:
            print(f"❌ {package} not found")

# Test import
try:
    from cosmos_predict2.inference import Video2WorldPipeline
    print("\n✅ Cosmos Predict2 imports working correctly")
except ImportError as e:
    print(f"\n❌ Import error: {e}")
    print("Trying alternative import...")
    try:
        # Add imaginaire to path as well
        if COSMOS_PATH:
            sys.path.insert(0, os.path.join(COSMOS_PATH, 'imaginaire'))
        from imaginaire.constants import get_cosmos_predict2_video2world_checkpoint
        print("✅ Alternative import successful")

In [None]:
# Optional: Mount Google Drive for automatic saving
# RECOMMENDED: Set to True to prevent data loss
mount_drive = True  # Set to True to auto-save outputs to Google Drive

if mount_drive:
    from google.colab import drive
    drive.mount('/content/drive')
    print("✅ Google Drive mounted at /content/drive")
    
    # Create output directory in Drive
    import os
    from datetime import datetime
    
    # Create timestamped folder for this session
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    drive_output_dir = f"/content/drive/MyDrive/cosmos_outputs_{timestamp}"
    os.makedirs(drive_output_dir, exist_ok=True)
    print(f"📁 Output directory created: {drive_output_dir}")
    print("⚠️ All outputs will be automatically saved to Google Drive")
else:
    print("⚠️ WARNING: Google Drive not mounted - outputs may be lost if runtime disconnects!")
    print("   Set mount_drive=True to enable automatic saving")
    drive_output_dir = None

## 2. Install Dependencies

In [None]:
# Download Model Checkpoints
MODEL_SIZE = "2B"  # Options: "2B", "5B", "14B"

print(f"Downloading Cosmos Predict2-{MODEL_SIZE} checkpoint...")
print("This may take a few minutes...")

from huggingface_hub import snapshot_download

# For GitHub installation, save checkpoints within the repo
if COSMOS_PATH:
    checkpoint_base_dir = os.path.join(COSMOS_PATH, "checkpoints")
    os.makedirs(checkpoint_base_dir, exist_ok=True)
else:
    checkpoint_base_dir = "/content/cosmos_checkpoints"

checkpoint_dir = snapshot_download(
    repo_id=f"nvidia/Cosmos-Predict2-{MODEL_SIZE}-Video2World",
    cache_dir=checkpoint_base_dir,
    resume_download=True
)

print(f"✅ Checkpoint downloaded to: {checkpoint_dir}")

# Create symlinks for GitHub installation to match expected paths
if COSMOS_PATH:
    nvidia_dir = os.path.join(COSMOS_PATH, "checkpoints", "nvidia")
    os.makedirs(nvidia_dir, exist_ok=True)
    
    link_path = os.path.join(nvidia_dir, f"Cosmos-Predict2-{MODEL_SIZE}-Video2World")
    if not os.path.exists(link_path):
        os.symlink(checkpoint_dir, link_path)
        print(f"✅ Created symlink: {link_path}")
    
    # Also check for tokenizer
    tokenizer_path = os.path.join(checkpoint_dir, "tokenizer", "tokenizer.pth")
    if not os.path.exists(tokenizer_path):
        print("⚠️ Tokenizer not found in checkpoint, will download separately if needed")

In [None]:
# Define prompts for paper manipulation task (same as original notebook)
prompts = [
    "A robotic arm picks up white paper and places it into a red square target area on the table.",
    "High-definition video of SO-101 robot manipulating paper with precise movements.",
    "Robot gripper grasps paper and moves it to designated red square zone.",
    "Automated paper handling: robot transfers white sheet to red target area.",
]

# Encode all prompts
print("Encoding prompts...")
encoded_prompts = {}

for prompt in prompts:
    encoded = t5_encoder.encode(prompt)
    encoded_prompts[prompt] = encoded["encoder_hidden_states"]
    print(f"✅ Encoded: '{prompt[:40]}...' Shape: {encoded['encoder_hidden_states'].shape}")

print(f"\n💾 Current GPU memory: {torch.cuda.memory_allocated()/1024**3:.2f} GB")

## 3. Mount Google Drive (Optional)\n
Mount your Google Drive if you have videos or want to save outputs there.

In [None]:
from cosmos_predict2.inference import (
    Video2WorldPipeline,
    get_cosmos_predict2_video2world_pipeline,
)

print(f"Loading Cosmos Predict2-{MODEL_SIZE} pipeline...")

# Create pipeline configuration
config = get_cosmos_predict2_video2world_pipeline(model_size=MODEL_SIZE)

# Update config to use our downloaded checkpoint
if COSMOS_PATH:
    # GitHub installation - use local checkpoint paths
    checkpoint_path = os.path.join(
        COSMOS_PATH,
        "checkpoints",
        "nvidia",
        f"Cosmos-Predict2-{MODEL_SIZE}-Video2World"
    )
    
    # Check which model file exists
    model_16fps = os.path.join(checkpoint_path, "model-720p-16fps.pt")
    model_10fps = os.path.join(checkpoint_path, "model-720p-10fps.pt")
    
    if os.path.exists(model_16fps):
        config['dit_checkpoint_path'] = model_16fps
        print(f"Using 16fps model: {model_16fps}")
    elif os.path.exists(model_10fps):
        config['dit_checkpoint_path'] = model_10fps
        print(f"Using 10fps model: {model_10fps}")
    else:
        # Fallback to checkpoint_dir from HuggingFace
        config['dit_checkpoint_path'] = os.path.join(checkpoint_dir, "model-720p-16fps.pt")
        print(f"Using HF checkpoint: {config['dit_checkpoint_path']}")
else:
    # PyPI installation - use downloaded checkpoint
    config['dit_checkpoint_path'] = os.path.join(
        checkpoint_dir,
        "model-720p-16fps.pt"  # or "model-720p-10fps.pt" for 10fps
    )

# Initialize pipeline
try:
    cosmos_pipe = Video2WorldPipeline.from_config(config)
    cosmos_pipe = cosmos_pipe.to("cuda")
    cosmos_pipe.eval()
    
    print(f"✅ Cosmos pipeline loaded successfully")
    print(f"💾 Current GPU memory: {torch.cuda.memory_allocated()/1024**3:.2f} GB")
    
except Exception as e:
    print(f"❌ Error loading pipeline: {e}")
    print("\nTrying alternative loading approach...")
    
    # Alternative approach matching the original notebook
    if COSMOS_PATH:
        os.chdir(COSMOS_PATH)
        
    try:
        from imaginaire.constants import get_cosmos_predict2_video2world_checkpoint
        dit_path = get_cosmos_predict2_video2world_checkpoint(model_size=MODEL_SIZE)
        
        cosmos_pipe = Video2WorldPipeline.from_config(
            config=get_cosmos_predict2_video2world_pipeline(model_size=MODEL_SIZE),
            dit_path=dit_path,
        )
        cosmos_pipe = cosmos_pipe.to("cuda")
        cosmos_pipe.eval()
        
        print(f"✅ Pipeline loaded using alternative method")
        
    except Exception as e2:
        print(f"❌ Alternative loading also failed: {e2}")
        raise

import decord
from einops import rearrange
import time

def generate_video_cosmos(input_path, prompt_embedding, num_frames=16, fps=16):
    """Generate video using Cosmos Predict2 with same parameters as original notebook."""
    
    # Load input frame
    if input_path.endswith(('.jpg', '.jpeg', '.png')):
        # Input is an image
        from PIL import Image
        img = Image.open(input_path)
        frames = np.array(img)[np.newaxis, ...]  # Add time dimension
    else:
        # Input is a video
        vr = decord.VideoReader(input_path)
        frames = vr[:1].asnumpy()  # Get first frame
    
    # Prepare input tensor
    frames_tensor = torch.from_numpy(frames).float() / 255.0
    frames_tensor = rearrange(frames_tensor, "t h w c -> 1 c t h w")
    frames_tensor = frames_tensor.to("cuda")
    
    print(f"Input shape: {frames_tensor.shape}")
    print(f"Generating {num_frames} frames at {fps} FPS...")
    
    start_time = time.time()
    
    with torch.no_grad():
        with torch.cuda.amp.autocast():
            output = cosmos_pipe(
                frames_tensor,
                prompt_embedding,
                num_frames=num_frames,
                fps=fps,
                seed=42
            )
    
    generation_time = time.time() - start_time
    print(f"✅ Generation complete in {generation_time:.2f} seconds")
    print(f"   Speed: {num_frames/generation_time:.2f} frames/second")
    
    return output

# Generation parameters matching original notebook
# Start with lower resolution for testing, then scale up for A100
base_params = {
    "num_frames": 8,  # Start with 8 frames, can increase to 16
    "fps": 16
}

# Check GPU and adjust parameters
if torch.cuda.is_available() and 'A100' in torch.cuda.get_device_name(0):
    print("🚀 A100 detected - Using optimized settings:")
    generation_params = {
        "num_frames": 16,  # Match original notebook
        "fps": 16
    }
else:
    print("Using conservative settings for non-A100 GPU:")
    generation_params = base_params

print(f"Generation parameters: num_frames={generation_params['num_frames']}, fps={generation_params['fps']}")

# Select the first prompt (matching original notebook)
selected_prompt = prompts[0]  # "A robotic arm picks up white paper and places it into a red square target area on the table."
print(f"\nGenerating video for: '{selected_prompt[:50]}...'")

# Get the pre-encoded embedding
prompt_embedding = encoded_prompts[selected_prompt]

# Generate video
output_video = generate_video_cosmos(
    input_image_path,
    prompt_embedding,
    num_frames=generation_params['num_frames'],
    fps=generation_params['fps']
)

In [None]:
import imageio
import shutil

def save_video(tensor, output_path="output_video.mp4", fps=16, auto_backup=True):
    """Save tensor as video file with automatic Google Drive backup."""
    # Convert tensor to numpy
    if isinstance(tensor, torch.Tensor):
        video = tensor.cpu().numpy()
    else:
        video = tensor
    
    # Rearrange dimensions if needed
    if video.ndim == 5:  # B C T H W
        video = video[0]  # Remove batch
    if video.shape[0] == 3:  # C T H W
        video = np.transpose(video, (1, 2, 3, 0))  # T H W C
    
    # Normalize to 0-255
    if video.max() <= 1.0:
        video = (video * 255).astype(np.uint8)
    
    # Save video locally first
    writer = imageio.get_writer(output_path, fps=fps)
    for frame in video:
        writer.append_data(frame)
    writer.close()
    
    print(f"✅ Saved video locally: {output_path}")
    
    # Auto-backup to Google Drive
    if auto_backup and drive_output_dir:
        drive_path = os.path.join(drive_output_dir, os.path.basename(output_path))
        shutil.copy2(output_path, drive_path)
        print(f"☁️ Backed up to Drive: {drive_path}")
        
        # Also save metadata
        metadata_path = drive_path.replace('.mp4', '_metadata.txt')
        with open(metadata_path, 'w') as f:
            f.write(f"Prompt: {selected_prompt}\n")
            f.write(f"Frames: {generation_params['num_frames']}\n")
            f.write(f"FPS: {generation_params['fps']}\n")
            f.write(f"Timestamp: {datetime.now().isoformat()}\n")
        print(f"📝 Metadata saved: {metadata_path}")
    
    return output_path

# Save the generated video with auto-backup
output_filename = f"cosmos_output_{datetime.now().strftime('%H%M%S')}.mp4"
output_path = save_video(output_video, output_filename, fps=16, auto_backup=True)

# Display the result
print("\nGenerated video:")
display_video(output_path)

# Optional download (in addition to Drive backup)
download_locally = input("\nDownload to your computer too? (y/n): ")
if download_locally.lower() == 'y':
    from google.colab import files
    files.download(output_path)

# Process all prompts with auto-save to Drive
batch_process = True  # Set to True to process all prompts

if batch_process:
    results = {}
    
    print(f"🎬 Batch processing {len(prompts)} prompts...")
    if drive_output_dir:
        print(f"📁 All outputs will be saved to: {drive_output_dir}")
    
    for i, prompt in enumerate(prompts):
        print(f"\n[{i+1}/{len(prompts)}] Processing: {prompt[:50]}...")
        
        try:
            # Generate video
            output = generate_video_cosmos(
                input_image_path,
                encoded_prompts[prompt],
                num_frames=generation_params['num_frames'],
                fps=generation_params['fps']
            )
            
            # Save with descriptive filename
            output_file = f"output_{i:02d}_{datetime.now().strftime('%H%M%S')}.mp4"
            save_video(output, output_file, fps=16, auto_backup=True)
            results[prompt] = output_file
            
            # Save batch progress to Drive
            if drive_output_dir:
                progress_file = os.path.join(drive_output_dir, "batch_progress.txt")
                with open(progress_file, 'a') as f:
                    f.write(f"[{i+1}/{len(prompts)}] {prompt[:80]} -> {output_file}\n")
            
            # Clear cache between generations
            torch.cuda.empty_cache()
            
        except Exception as e:
            print(f"  ❌ Failed: {e}")
            # Log errors to Drive
            if drive_output_dir:
                error_file = os.path.join(drive_output_dir, "errors.txt")
                with open(error_file, 'a') as f:
                    f.write(f"Failed [{i+1}]: {prompt[:80]} - Error: {e}\n")
            continue
    
    print("\n✅ Batch processing complete!")
    print(f"Successfully generated {len(results)}/{len(prompts)} videos")
    
    # Save summary to Drive
    if drive_output_dir:
        summary_file = os.path.join(drive_output_dir, "summary.txt")
        with open(summary_file, 'w') as f:
            f.write(f"Batch Processing Summary\n")
            f.write(f"========================\n")
            f.write(f"Total prompts: {len(prompts)}\n")
            f.write(f"Successful: {len(results)}\n")
            f.write(f"Failed: {len(prompts) - len(results)}\n\n")
            for prompt, file in results.items():
                f.write(f"{prompt[:80]}...\n  -> {file}\n\n")
        print(f"📊 Summary saved to: {summary_file}")
    
    for prompt, file in results.items():
        print(f"  - {prompt[:40]}... -> {file}")

In [None]:
# Memory usage and session management
print("Session Status:")
print(f"GPU allocated: {torch.cuda.memory_allocated()/1024**3:.2f} GB")
print(f"GPU reserved: {torch.cuda.memory_reserved()/1024**3:.2f} GB")
print(f"GPU free: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated())/1024**3:.2f} GB")

if drive_output_dir:
    print(f"\n✅ Outputs saved to Google Drive:")
    print(f"   {drive_output_dir}")
    print("\n⚠️ Your outputs are safe even if the session disconnects!")
    
    # Create recovery script for next session
    recovery_script = f"""# Recovery Script
# Run this in a new session to continue from where you left off

import os
from google.colab import drive

# Mount Drive
drive.mount('/content/drive')

# Previous output directory
output_dir = "{drive_output_dir}"

# List generated videos
import glob
videos = glob.glob(os.path.join(output_dir, "*.mp4"))
print(f"Found {{len(videos)}} generated videos:")
for v in videos:
    print(f"  - {{os.path.basename(v)}}")

# Read progress
if os.path.exists(os.path.join(output_dir, "batch_progress.txt")):
    with open(os.path.join(output_dir, "batch_progress.txt"), 'r') as f:
        print("\\nBatch Progress:")
        print(f.read())
"""
    
    recovery_path = os.path.join(drive_output_dir, "recovery_script.py")
    with open(recovery_path, 'w') as f:
        f.write(recovery_script)
    print(f"📄 Recovery script saved: {recovery_path}")
else:
    print("\n⚠️ No Google Drive backup - outputs will be lost if session disconnects!")

# Optional: Free memory
cleanup = False  # Set to True to free all memory

if cleanup:
    print("\nCleaning up...")
    
    # Unload T5
    if 't5_encoder' in locals():
        t5_encoder.unload()
    
    # Unload Cosmos
    if 'cosmos_pipe' in locals():
        del cosmos_pipe
    
    # Clear cache
    import gc
    gc.collect()
    torch.cuda.empty_cache()
    
    print(f"✅ Cleanup complete")
    print(f"GPU allocated: {torch.cuda.memory_allocated()/1024**3:.2f} GB")

In [None]:
# Choose T5 model based on available memory
gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3

if gpu_memory >= 40:  # A100
    # Can use T5-11B for best quality
    t5_model = "google-t5/t5-11b"  # 22GB in FP16
    print(f"Using T5-11B (best quality) on A100")
elif gpu_memory >= 16:  # T4 or similar
    # Use smaller model
    t5_model = "google/flan-t5-xl"  # 3GB
    print(f"Using Flan-T5-XL (efficient) on limited GPU")
else:
    t5_model = "google/flan-t5-base"  # <1GB
    print(f"Using Flan-T5-Base (minimal) on very limited GPU")

# Initialize and load T5 encoder
t5_encoder = OptimizedT5Encoder(model_name=t5_model)
t5_encoder.load(use_fp16=True, use_8bit=False)

## 6. Encode Text Prompts

In [None]:
# Define prompts for paper manipulation task
prompts = [
    "The robot picks up a piece of paper from the table",
    "The robot folds the paper in half",
    "The robot places the folded paper back on the table",
    "The robot arm reaches for a sheet of paper",
    "The robot grasps the paper with its gripper",
]

# Encode all prompts
print("Encoding prompts...")
encoded_prompts = {}

for prompt in prompts:
    encoded = t5_encoder.encode(prompt)
    encoded_prompts[prompt] = encoded["encoder_hidden_states"]
    print(f"✅ Encoded: '{prompt[:40]}...' Shape: {encoded['encoder_hidden_states'].shape}")

print(f"\n💾 Current GPU memory: {torch.cuda.memory_allocated()/1024**3:.2f} GB")

## 7. Load Cosmos Predict2 Pipeline

In [None]:
from cosmos_predict2.inference import (
    Video2WorldPipeline,
    get_cosmos_predict2_video2world_pipeline,
)

print(f"Loading Cosmos Predict2-{MODEL_SIZE} pipeline...")

# Create pipeline configuration
config = get_cosmos_predict2_video2world_pipeline(model_size=MODEL_SIZE)

# Update config to use our downloaded checkpoint
config['dit_checkpoint_path'] = os.path.join(
    checkpoint_dir,
    "model-720p-16fps.pt"  # or "model-720p-10fps.pt" for 10fps
)

# Initialize pipeline
cosmos_pipe = Video2WorldPipeline.from_config(config)
cosmos_pipe = cosmos_pipe.to("cuda")
cosmos_pipe.eval()

print(f"✅ Cosmos pipeline loaded")
print(f"💾 Current GPU memory: {torch.cuda.memory_allocated()/1024**3:.2f} GB")

## 8. Create or Load Input Video\n
You can either upload a video or create a simple test video.

In [None]:
import numpy as np
import cv2
from IPython.display import HTML, display
import base64

def create_test_video(output_path="test_input.mp4", width=1280, height=720, fps=16, duration=1):
    """Create a simple test video with a moving object."""
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    num_frames = int(fps * duration)
    
    for i in range(num_frames):
        # Create a frame with gradient background
        frame = np.zeros((height, width, 3), dtype=np.uint8)
        
        # Add gradient background
        for y in range(height):
            frame[y, :] = [int(255 * y / height), 100, 150]
        
        # Add moving circle (simulating object)
        x = int(width * (0.2 + 0.6 * i / num_frames))
        y = height // 2
        cv2.circle(frame, (x, y), 50, (255, 255, 255), -1)
        
        # Add text
        cv2.putText(frame, "Test Input", (50, 50), 
                   cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        
        out.write(frame)
    
    out.release()
    print(f"Created test video: {output_path}")
    return output_path

def display_video(video_path):
    """Display video in notebook."""
    video = open(video_path, 'rb').read()
    encoded = base64.b64encode(video).decode('ascii')
    display(HTML(f'''
    <video width="640" height="360" controls>
        <source src="data:video/mp4;base64,{encoded}" type="video/mp4">
    </video>
    '''))

# Create or upload video
use_test_video = True  # Set to False if you want to upload your own

if use_test_video:
    input_video_path = create_test_video()
    display_video(input_video_path)
else:
    from google.colab import files
    print("Please upload a video file:")
    uploaded = files.upload()
    input_video_path = list(uploaded.keys())[0]
    print(f"Uploaded: {input_video_path}")

## 9. Generate Video with Cosmos Predict2

In [None]:
import decord
from einops import rearrange
import time

def generate_video_cosmos(input_path, prompt_embedding, num_frames=121, fps=16):
    """Generate video using Cosmos Predict2."""
    
    # Load input video
    vr = decord.VideoReader(input_path)
    frames = vr[:1].asnumpy()  # Get first frame
    
    # Prepare input tensor
    frames_tensor = torch.from_numpy(frames).float() / 255.0
    frames_tensor = rearrange(frames_tensor, "t h w c -> 1 c t h w")
    frames_tensor = frames_tensor.to("cuda")
    
    print(f"Input shape: {frames_tensor.shape}")
    print(f"Generating {num_frames} frames at {fps} FPS...")
    
    start_time = time.time()
    
    with torch.no_grad():
        with torch.cuda.amp.autocast():
            output = cosmos_pipe(
                frames_tensor,
                prompt_embedding,
                num_frames=num_frames,
                fps=fps,
                seed=42
            )
    
    generation_time = time.time() - start_time
    print(f"✅ Generation complete in {generation_time:.2f} seconds")
    print(f"   Speed: {num_frames/generation_time:.2f} frames/second")
    
    return output

# Select a prompt and generate
selected_prompt = prompts[0]  # "The robot picks up a piece of paper from the table"
print(f"\nGenerating video for: '{selected_prompt}'")

# Get the pre-encoded embedding
prompt_embedding = encoded_prompts[selected_prompt]

# Generate video
output_video = generate_video_cosmos(
    input_video_path,
    prompt_embedding,
    num_frames=121,  # ~7.5 seconds at 16fps
    fps=16
)

## 10. Save and Display Results

In [None]:
import imageio

def save_video(tensor, output_path="output_video.mp4", fps=16):
    """Save tensor as video file."""
    # Convert tensor to numpy
    if isinstance(tensor, torch.Tensor):
        video = tensor.cpu().numpy()
    else:
        video = tensor
    
    # Rearrange dimensions if needed
    if video.ndim == 5:  # B C T H W
        video = video[0]  # Remove batch
    if video.shape[0] == 3:  # C T H W
        video = np.transpose(video, (1, 2, 3, 0))  # T H W C
    
    # Normalize to 0-255
    if video.max() <= 1.0:
        video = (video * 255).astype(np.uint8)
    
    # Save video
    writer = imageio.get_writer(output_path, fps=fps)
    for frame in video:
        writer.append_data(frame)
    writer.close()
    
    print(f"Saved video to: {output_path}")
    return output_path

# Save the generated video
output_path = save_video(output_video, "cosmos_output.mp4", fps=16)

# Display the result
print("\nGenerated video:")
display_video(output_path)

# Download option
from google.colab import files
download = input("Download the video? (y/n): ")
if download.lower() == 'y':
    files.download(output_path)

## 11. Batch Processing (Optional)\n
Process multiple prompts efficiently.

In [None]:
# Process all prompts
batch_process = False  # Set to True to process all prompts

if batch_process:
    results = {}
    
    for i, prompt in enumerate(prompts):
        print(f"\n[{i+1}/{len(prompts)}] Processing: {prompt[:50]}...")
        
        # Generate video
        output = generate_video_cosmos(
            input_video_path,
            encoded_prompts[prompt],
            num_frames=61,  # Shorter for batch processing
            fps=16
        )
        
        # Save
        output_file = f"output_{i:02d}.mp4"
        save_video(output, output_file)
        results[prompt] = output_file
        
        # Clear cache between generations
        torch.cuda.empty_cache()
    
    print("\n✅ Batch processing complete!")
    for prompt, file in results.items():
        print(f"  - {prompt[:40]}... -> {file}")

## 12. Memory Management and Cleanup

In [None]:
# Memory usage summary
print("Memory Usage Summary:")
print(f"GPU allocated: {torch.cuda.memory_allocated()/1024**3:.2f} GB")
print(f"GPU reserved: {torch.cuda.memory_reserved()/1024**3:.2f} GB")
print(f"GPU free: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated())/1024**3:.2f} GB")

# Optional: Free memory
cleanup = False  # Set to True to free all memory

if cleanup:
    print("\nCleaning up...")
    
    # Unload T5
    if 't5_encoder' in locals():
        t5_encoder.unload()
    
    # Unload Cosmos
    if 'cosmos_pipe' in locals():
        del cosmos_pipe
    
    # Clear cache
    gc.collect()
    torch.cuda.empty_cache()
    
    print(f"✅ Cleanup complete")
    print(f"GPU allocated: {torch.cuda.memory_allocated()/1024**3:.2f} GB")

## Tips and Troubleshooting\n
\n
### Memory Optimization:\n
- **A100 (40GB)**: Can run T5-11B + Cosmos-14B\n
- **T4 (16GB)**: Use Flan-T5-XL + Cosmos-2B\n
- **Low memory**: Use 8-bit quantization or unload T5 after encoding\n
\n
### Performance Tips:\n
- Enable TF32 on A100 for 2-3x speedup\n
- Use FP16 (half precision) for memory efficiency\n
- Batch encode prompts before generation\n
\n
### Common Issues:\n
1. **OOM Error**: Reduce batch size or use smaller models\n
2. **Slow generation**: Check GPU type, use smaller num_frames\n
3. **Import errors**: Restart runtime after installing packages