In [1]:
# Depth Anything V2 Video Processing
# This notebook processes video files and generates depth map videos using Depth-Anything-V2

import torch
import cv2
import numpy as np
from PIL import Image
from transformers import pipeline
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.cm as cm

# Set device for processing
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Initialize the depth estimation pipeline
print("Loading Depth-Anything-V2 model...")
pipe = pipeline(task="depth-estimation", model="depth-anything/Depth-Anything-V2-Small-hf", device=device)
print("Model loaded successfully!")

Using device: cuda
Loading Depth-Anything-V2 model...


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cuda


Model loaded successfully!


In [2]:
# Install required packages
# Run this cell if you haven't installed these packages yet

# !pip install opencv-python
# !pip install matplotlib
# !pip install tqdm

# Configuration
INPUT_VIDEO_PATH = "input_video.mp4"
OUTPUT_VIDEO_PATH = "output_depth_video.mp4"
TEMP_FRAMES_DIR = "temp_frames"

# Create temporary directory for frames if it doesn't exist
if not os.path.exists(TEMP_FRAMES_DIR):
    os.makedirs(TEMP_FRAMES_DIR)
    
print("Configuration set up complete!")


Configuration set up complete!


In [3]:
# Video Processing Functions

def process_frame(frame):
    """
    Process a single frame to generate depth map
    
    Args:
        frame: OpenCV frame (BGR format)
    
    Returns:
        depth_frame: Processed depth map as an image
    """
    # Convert BGR to RGB for PIL
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Convert to PIL Image
    pil_image = Image.fromarray(frame_rgb)
    
    # Generate depth map
    depth_result = pipe(pil_image)
    depth_map = depth_result["depth"]
    
    return depth_map

def depth_to_colormap(depth_map):
    """
    Convert depth map to a colorized representation
    
    Args:
        depth_map: PIL Image depth map
    
    Returns:
        colorized_frame: OpenCV frame with colorized depth map
    """
    # Convert depth map to numpy array
    depth_array = np.array(depth_map)
    
    # Normalize depth values to 0-255 range
    depth_normalized = ((depth_array - depth_array.min()) / 
                       (depth_array.max() - depth_array.min()) * 255).astype(np.uint8)
    
    # Apply colormap (using matplotlib's plasma colormap)
    colormap = cm.get_cmap('plasma')
    depth_colored = colormap(depth_normalized / 255.0)
    
    # Convert to 0-255 range and remove alpha channel
    depth_colored = (depth_colored[:, :, :3] * 255).astype(np.uint8)
    
    # Convert RGB to BGR for OpenCV
    depth_frame = cv2.cvtColor(depth_colored, cv2.COLOR_RGB2BGR)
    
    return depth_frame

def get_video_properties(video_path):
    """
    Get video properties like fps, width, height, and frame count
    
    Args:
        video_path: Path to input video
    
    Returns:
        Dictionary with video properties
    """
    cap = cv2.VideoCapture(video_path)
    
    properties = {
        'fps': int(cap.get(cv2.CAP_PROP_FPS)),
        'width': int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
        'height': int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
        'frame_count': int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    }
    
    cap.release()
    return properties

print("Video processing functions defined!")


Video processing functions defined!


In [4]:
# Main Video Processing Function

def process_video(input_path, output_path, show_progress=True):
    """
    Process entire video to generate depth map video
    
    Args:
        input_path: Path to input video file
        output_path: Path to output depth map video
        show_progress: Whether to show progress bar
    
    Returns:
        success: Boolean indicating success
    """
    # Check if input video exists
    if not os.path.exists(input_path):
        print(f"Error: Input video '{input_path}' not found!")
        return False
    
    # Get video properties
    print(f"Processing video: {input_path}")
    video_props = get_video_properties(input_path)
    print(f"Video properties: {video_props}")
    
    # Open input video
    cap = cv2.VideoCapture(input_path)
    
    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, video_props['fps'], 
                         (video_props['width'], video_props['height']))
    
    # Initialize progress bar
    frame_count = 0
    total_frames = video_props['frame_count']
    
    if show_progress:
        pbar = tqdm(total=total_frames, desc="Processing frames")
    
    try:
        while True:
            # Read frame
            ret, frame = cap.read()
            if not ret:
                break
            
            # Process frame to get depth map
            depth_map = process_frame(frame)
            
            # Convert depth map to colorized representation
            depth_frame = depth_to_colormap(depth_map)
            
            # Write frame to output video
            out.write(depth_frame)
            
            frame_count += 1
            if show_progress:
                pbar.update(1)
        
        print(f"\\nProcessing complete! Processed {frame_count} frames.")
        print(f"Output video saved to: {output_path}")
        
    except Exception as e:
        print(f"Error during processing: {str(e)}")
        return False
    
    finally:
        # Release everything
        cap.release()
        out.release()
        if show_progress:
            pbar.close()
    
    return True

print("Main video processing function defined!")


Main video processing function defined!


In [5]:
# Optional: Visualization and Testing Functions

def test_single_frame(video_path, frame_number=0):
    """
    Test depth processing on a single frame for visualization
    
    Args:
        video_path: Path to input video
        frame_number: Frame number to test (default: 0)
    """
    if not os.path.exists(video_path):
        print(f"Error: Video '{video_path}' not found!")
        return
    
    # Open video and seek to specific frame
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
    
    ret, frame = cap.read()
    if not ret:
        print(f"Error: Could not read frame {frame_number}")
        cap.release()
        return
    
    # Process the frame
    print(f"Processing frame {frame_number}...")
    depth_map = process_frame(frame)
    depth_frame = depth_to_colormap(depth_map)
    
    # Create side-by-side comparison
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))
    
    # Original frame (convert BGR to RGB for matplotlib)
    original_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    axes[0].imshow(original_rgb)
    axes[0].set_title('Original Frame')
    axes[0].axis('off')
    
    # Depth map (convert BGR to RGB for matplotlib)
    depth_rgb = cv2.cvtColor(depth_frame, cv2.COLOR_BGR2RGB)
    axes[1].imshow(depth_rgb)
    axes[1].set_title('Depth Map')
    axes[1].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    cap.release()
    print("Single frame test completed!")

def display_video_info(video_path):
    """
    Display information about the input video
    
    Args:
        video_path: Path to video file
    """
    if not os.path.exists(video_path):
        print(f"Error: Video '{video_path}' not found!")
        return
    
    props = get_video_properties(video_path)
    print(f"Video Information for: {video_path}")
    print(f"  Resolution: {props['width']}x{props['height']}")
    print(f"  FPS: {props['fps']}")
    print(f"  Total Frames: {props['frame_count']}")
    print(f"  Duration: {props['frame_count'] / props['fps']:.2f} seconds")

print("Visualization and testing functions defined!")


Visualization and testing functions defined!


In [6]:
# MAIN EXECUTION CELL
# Run this cell to process the video

# Step 1: Display video information
print("=== VIDEO PROCESSING PIPELINE ===\\n")
print("Step 1: Checking input video...")
display_video_info(INPUT_VIDEO_PATH)

# Step 2: Optional - Test single frame (uncomment to test)
# print("\\nStep 2: Testing single frame...")
# test_single_frame(INPUT_VIDEO_PATH, frame_number=0)

# Step 3: Process the entire video
print("\\nStep 3: Processing entire video...")
print("This may take a while depending on video length and hardware...")
success = process_video(INPUT_VIDEO_PATH, OUTPUT_VIDEO_PATH, show_progress=True)

if success:
    print("\\n✅ SUCCESS! Video processing completed successfully!")
    print(f"Input video: {INPUT_VIDEO_PATH}")
    print(f"Output video: {OUTPUT_VIDEO_PATH}")
    print("\\nThe output video contains depth maps where:")
    print("- Closer objects appear in warm colors (red, orange, yellow)")
    print("- Farther objects appear in cool colors (blue, purple)")
else:
    print("\\n❌ FAILED! Video processing encountered an error.")
    print("Please check the error messages above and ensure:")
    print("1. The input video file exists")
    print("2. The input video format is supported")
    print("3. You have sufficient disk space")
    print("4. All required packages are installed")


=== VIDEO PROCESSING PIPELINE ===\n
Step 1: Checking input video...
Video Information for: input_video.mp4
  Resolution: 1620x1080
  FPS: 30
  Total Frames: 317
  Duration: 10.57 seconds
\nStep 3: Processing entire video...
This may take a while depending on video length and hardware...
Processing video: input_video.mp4
Video properties: {'fps': 30, 'width': 1620, 'height': 1080, 'frame_count': 317}


  colormap = cm.get_cmap('plasma')
Processing frames:   3%|▎         | 10/317 [00:03<00:53,  5.70it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Processing frames: 100%|██████████| 317/317 [00:48<00:00,  6.49it/s]

\nProcessing complete! Processed 317 frames.
Output video saved to: output_depth_video.mp4
\n✅ SUCCESS! Video processing completed successfully!
Input video: input_video.mp4
Output video: output_depth_video.mp4
\nThe output video contains depth maps where:
- Closer objects appear in warm colors (red, orange, yellow)
- Farther objects appear in cool colors (blue, purple)





In [7]:
# CLEANUP AND UTILITIES

def cleanup_temp_files():
    """
    Clean up temporary files and directories
    """
    import shutil
    
    if os.path.exists(TEMP_FRAMES_DIR):
        shutil.rmtree(TEMP_FRAMES_DIR)
        print(f"Removed temporary directory: {TEMP_FRAMES_DIR}")
    else:
        print("No temporary files to clean up.")

def batch_process_videos(video_list, output_prefix="depth_"):
    """
    Process multiple videos in batch
    
    Args:
        video_list: List of video file paths
        output_prefix: Prefix for output files
    """
    results = []
    
    for i, video_path in enumerate(video_list):
        print(f"\\n=== Processing video {i+1}/{len(video_list)}: {video_path} ===")
        
        # Generate output filename
        video_name = os.path.splitext(os.path.basename(video_path))[0]
        output_path = f"{output_prefix}{video_name}.mp4"
        
        # Process video
        success = process_video(video_path, output_path)
        results.append((video_path, output_path, success))
    
    # Print summary
    print("\\n=== BATCH PROCESSING SUMMARY ===")
    for video_path, output_path, success in results:
        status = "✅ SUCCESS" if success else "❌ FAILED"
        print(f"{status}: {video_path} -> {output_path}")
    
    return results

# Optional: Clean up temporary files
# cleanup_temp_files()

print("Cleanup and utility functions defined!")


Cleanup and utility functions defined!


# Depth Anything V2 Video Processing - Documentation

## Overview
This notebook processes video files to generate depth map visualizations using the Depth-Anything-V2 model. The output video shows depth information where warm colors (red, orange, yellow) represent closer objects and cool colors (blue, purple) represent farther objects.

## Key Features
- **Video Processing**: Processes entire video files frame by frame
- **Depth Estimation**: Uses Depth-Anything-V2-Small-hf model for accurate depth estimation
- **Colorized Output**: Converts depth maps to intuitive color representations
- **Progress Tracking**: Shows processing progress with tqdm progress bars
- **Batch Processing**: Support for processing multiple videos
- **Error Handling**: Comprehensive error handling and informative messages

## Required Dependencies
```bash
pip install torch transformers opencv-python matplotlib tqdm pillow
```

## Usage Steps
1. **Install Dependencies**: Run the installation commands in cell 2
2. **Configure Paths**: Modify `INPUT_VIDEO_PATH` and `OUTPUT_VIDEO_PATH` in cell 2
3. **Run Processing**: Execute the main execution cell (cell 6)
4. **Optional Testing**: Uncomment the single frame test to preview results

## Technical Implementation

### Core Components
- **`process_frame()`**: Processes individual frames using the depth estimation pipeline
- **`depth_to_colormap()`**: Converts depth maps to colorized visualizations using matplotlib's plasma colormap
- **`process_video()`**: Main function that handles the complete video processing pipeline
- **`get_video_properties()`**: Extracts video metadata (resolution, fps, frame count)

### Processing Pipeline
1. Load video and extract properties
2. Initialize depth estimation model
3. Process each frame:
   - Convert BGR to RGB for PIL compatibility
   - Generate depth map using Depth-Anything-V2
   - Convert depth map to colorized representation
   - Write processed frame to output video
4. Save final video with original fps and resolution

### Performance Considerations
- Uses GPU acceleration if available (CUDA)
- Processes videos frame by frame to manage memory usage
- Progress tracking for long videos
- Temporary file cleanup utilities

## Output Format
- **File Format**: MP4 video
- **Resolution**: Same as input video
- **Frame Rate**: Same as input video
- **Color Scheme**: Plasma colormap (purple = far, yellow = close)

## Troubleshooting
- Ensure input video exists and is in a supported format
- Check that all dependencies are installed
- Verify sufficient disk space for output video
- For large videos, ensure adequate RAM/VRAM

## Advanced Usage
- **Batch Processing**: Use `batch_process_videos()` for multiple files
- **Custom Colormaps**: Modify `depth_to_colormap()` to use different color schemes
- **Frame Testing**: Use `test_single_frame()` to preview results before full processing
