<a href="https://colab.research.google.com/github/jenochs/video-generation-book/blob/main/notebooks/hunyuan_colab_a100.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# HunyuanVideo on Google Colab A100

**Generate high-quality videos using Tencent's HunyuanVideo model on Google Colab A100 GPU**

🚀 **What you'll learn:**
- Run the 13B parameter HunyuanVideo model on Colab A100 (40GB)
- Optimize memory usage for large-scale video generation
- Generate videos up to 15 seconds with advanced prompting
- Export and download high-quality video results

⚡ **Requirements:**
- Google Colab Pro+ with A100 GPU access
- ~20-30 minutes for complete setup
- Google Drive for video storage (optional)

📚 **From the Book:** *Hands-On Video Generation with AI* - Chapter 3: Advanced Model Implementation

## 🔧 1. Environment Setup & GPU Verification

First, let's verify we have an A100 GPU and configure the environment for optimal performance.

In [None]:
# Check GPU availability and specifications
!nvidia-smi

# Verify we have A100 access
import subprocess
result = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader,nounits'], 
                       capture_output=True, text=True)
print("\n🖥️ GPU Information:")
gpu_info = result.stdout.strip().split(', ')
if len(gpu_info) >= 2:
    gpu_name, gpu_memory = gpu_info[0], int(gpu_info[1])
    print(f"   GPU: {gpu_name}")
    print(f"   Memory: {gpu_memory:,} MB ({gpu_memory/1024:.1f} GB)")
    
    if "A100" in gpu_name and gpu_memory >= 40000:
        print("   ✅ Perfect! A100 40GB detected - optimal for HunyuanVideo")
    elif "A100" in gpu_name:
        print("   ⚠️ A100 detected but check memory - may need optimization")
    else:
        print("   ❌ Warning: A100 GPU recommended for best performance")
        print("   💡 Consider upgrading to Colab Pro+ for A100 access")
else:
    print("   ❌ Unable to detect GPU information")

In [None]:
# Configure environment for maximum memory efficiency
import os
import torch

# Essential memory optimizations for A100 40GB
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
os.environ['CUDA_LAUNCH_BLOCKING'] = '0'  # Async for better performance
os.environ['TOKENIZERS_PARALLELISM'] = 'false'  # Avoid warnings

# Enable optimized math operations
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.benchmark = True

print("🔧 Environment configured for A100 optimization")
print(f"   PyTorch version: {torch.__version__}")
print(f"   CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   CUDA version: {torch.version.cuda}")
    print(f"   GPU count: {torch.cuda.device_count()}")

## 📦 2. Install Dependencies

Install the latest versions of required libraries optimized for HunyuanVideo.

In [ ]:
# Install PyTorch 2.7+ (Colab has 2.1, we need latest for best video generation)
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 --upgrade

In [ ]:
# Install only what we need (some packages already in Colab)
!pip install diffusers>=0.33.1 --upgrade  # Not in Colab, required for HunyuanVideo
!pip install transformers>=4.52.4 --upgrade  # Upgrade from Colab's 4.37.2
!pip install xformers --upgrade  # Critical for memory efficiency, may not be in Colab
!pip install imageio-ffmpeg  # For video processing, may not be in Colab

# These are usually pre-installed in Colab, but upgrade if needed
!pip install accelerate safetensors --upgrade

In [ ]:
# Install the videogenbook package
!pip install git+https://github.com/jenochs/video-generation-book.git

# Verify installation and check versions
import videogenbook
import torch
import diffusers
import transformers

print(f"✅ videogenbook v{videogenbook.__version__} installed successfully")
print(f"🔧 PyTorch: {torch.__version__}")
print(f"🤖 Diffusers: {diffusers.__version__}")  
print(f"📝 Transformers: {transformers.__version__}")
print(f"🎯 CUDA available: {torch.cuda.is_available()}")

## 🧠 3. Memory Monitoring & Optimization

Set up memory monitoring and configure HunyuanVideo for A100 40GB constraints.

In [None]:
import torch
import gc
from typing import Dict, Any

def get_gpu_memory() -> Dict[str, float]:
    """Get current GPU memory usage in GB."""
    if not torch.cuda.is_available():
        return {"total": 0, "used": 0, "free": 0}
    
    total = torch.cuda.get_device_properties(0).total_memory / 1024**3
    allocated = torch.cuda.memory_allocated() / 1024**3
    cached = torch.cuda.memory_reserved() / 1024**3
    free = total - cached
    
    return {
        "total": total,
        "allocated": allocated,
        "cached": cached,
        "free": free
    }

def print_memory_status(stage: str = ""):
    """Print current memory status."""
    mem = get_gpu_memory()
    print(f"🧠 GPU Memory {stage}:")
    print(f"   Total: {mem['total']:.1f} GB")
    print(f"   Allocated: {mem['allocated']:.1f} GB")
    print(f"   Cached: {mem['cached']:.1f} GB") 
    print(f"   Free: {mem['free']:.1f} GB")
    
    # Memory warnings
    if mem['free'] < 10:
        print("   ⚠️ Low memory - consider reducing resolution/frames")
    elif mem['free'] < 20:
        print("   ✅ Sufficient memory for standard generation")
    else:
        print("   🚀 Excellent memory - can use higher quality settings")

def cleanup_memory():
    """Cleanup GPU memory."""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

# Initial memory check
print_memory_status("(Initial)")

## 🤖 4. Load HunyuanVideo Model

Load the HunyuanVideo model with A100-optimized settings for 40GB memory constraint.

In [None]:
from diffusers import HunyuanVideoPipeline
import torch
import time

print("🔄 Loading HunyuanVideo model (this may take 5-10 minutes)...")
print("📥 Downloading ~26GB of model weights...")

start_time = time.time()

try:
    # Load with aggressive memory optimization for A100 40GB
    pipe = HunyuanVideoPipeline.from_pretrained(
        "hunyuanvideo-community/HunyuanVideo",  # Fixed: Use community diffusers version
        torch_dtype=torch.float16,  # Use FP16 for memory efficiency
        use_safetensors=True,
        # variant="fp16",  # REMOVED: Community model doesn't have fp16 variants
        low_cpu_mem_usage=True,     # Minimize CPU memory during loading
    )
    
    print("\n🔧 Applying A100 optimizations...")
    
    # Essential memory optimizations for 40GB constraint
    pipe.enable_sequential_cpu_offload()  # Most aggressive memory optimization
    pipe.vae.enable_tiling()              # Reduce VAE memory usage
    pipe.vae.enable_slicing()             # Further VAE optimization
    
    # Enable memory-efficient attention if available
    try:
        pipe.enable_xformers_memory_efficient_attention()
        print("   ✅ xFormers memory-efficient attention enabled")
    except ImportError:
        print("   ⚠️ xFormers not available - using default attention")
    except Exception as e:
        print(f"   ⚠️ xFormers setup issue: {e}")
    
    # Configure scheduler for memory efficiency
    if hasattr(pipe.scheduler, 'enable_low_mem_usage'):
        pipe.scheduler.enable_low_mem_usage = True
    
    load_time = time.time() - start_time
    print(f"\n✅ HunyuanVideo loaded successfully in {load_time:.1f}s")
    print("🎬 Ready for video generation!")
    
    print_memory_status("(After model loading)")
    
except Exception as e:
    print(f"❌ Failed to load HunyuanVideo: {str(e)}")
    print("\n🔍 Troubleshooting steps:")
    print("1. Ensure you have A100 GPU access")
    print("2. Check available disk space (need ~30GB)")
    print("3. Restart runtime and try again")
    raise

from IPython.display import Video, display
import imageio
import numpy as np
import os

# Create output directory
os.makedirs("/content/videos", exist_ok=True)

# Test generation with A100-optimized settings
print("🎬 Generating test video...")
prompt = "A majestic golden eagle soaring over snow-capped mountains at sunset, cinematic camera movement"

try:
    # Generate with optimized settings for A100 40GB
    video_frames = pipe(
        prompt=prompt,
        height=544,           # Optimized for A100 memory
        width=960,            # 16:9 aspect ratio
        num_frames=32,        # Shorter for memory efficiency
        guidance_scale=6.0,   # Good quality/memory balance
        num_inference_steps=25,  # Faster generation
        generator=torch.Generator(device="cuda").manual_seed(42)
    ).frames[0]
    
    # Save video - Convert PIL images to numpy arrays
    output_path = "/content/videos/test_video.mp4"
    print("💾 Saving video...")
    
    with imageio.get_writer(output_path, fps=8, codec='h264') as writer:
        for frame in video_frames:
            # Convert PIL Image to numpy array
            if hasattr(frame, 'convert'):  # PIL Image
                frame_array = np.array(frame.convert('RGB'))
            else:  # Already numpy array
                frame_array = frame
            writer.append_data(frame_array)
    
    print(f"✅ Video saved to: {output_path}")
    
    # Display the video
    display(Video(output_path, width=600))
    
    print_memory_status("(After generation)")
    
except Exception as e:
    print(f"❌ Generation failed: {str(e)}")
    print("🔍 Debug info:")
    print(f"   Frame type: {type(video_frames[0]) if 'video_frames' in locals() else 'Not generated'}")
    cleanup_memory()
    raise

In [ ]:
from IPython.display import Video, display
import imageio
import numpy as np
import os
from datetime import datetime

# Create output directory
os.makedirs(\"/content/videos\", exist_ok=True)

## 🎬 5. Simple Video Generation Function

def generate_video_colab(prompt, height=544, width=960, num_frames=32, steps=25, seed=42):
    \"\"\"Simple video generation function for Colab.\"\"\"
    
    print(f\"🎬 Generating: {prompt[:60]}...\")
    print(f\"🎯 Settings: {width}x{height}, {num_frames} frames, {steps} steps\")\n    
    try:
        # Generate video
        result = pipe(
            prompt=prompt,
            height=height,
            width=width, 
            num_frames=num_frames,
            guidance_scale=6.0,
            num_inference_steps=steps,
            generator=torch.Generator(device=\"cuda\").manual_seed(seed)
        )
        
        # Extract frames
        video_frames = result.frames[0]
        
        # Save video
        timestamp = datetime.now().strftime(\"%H%M%S\")
        output_path = f\"/content/videos/video_{timestamp}.mp4\"
        
        print(\"💾 Saving video...\")
        with imageio.get_writer(output_path, fps=8, codec='h264') as writer:
            for frame in video_frames:
                frame_array = np.array(frame.convert('RGB'))
                writer.append_data(frame_array)
        
        print(f\"✅ Saved: {output_path}\")
        return output_path
        
    except Exception as e:
        print(f\"❌ Error: {e}\")
        cleanup_memory()
        raise

print(\"🎬 Simple generation function ready!\")
print(\"📝 Usage: video_path = generate_video_colab('your prompt here')\")"

# Generate and display a test video
video_path = generate_video_colab(
    \"A majestic golden eagle soaring over snow-capped mountains at sunset, cinematic camera movement\"
)

# Display the video
display(Video(video_path, width=600))
print_memory_status(\"(After generation)\")"