# =================================================
# CLEAR NATURE
# Detect, Segment and Inpaint
# =================================================


In [1]:
# ============================================================================
# 1. INSTALLATIONS
# ============================================================================

import os
import sys

print("Installing dependencies compatible with Colab's Python 3.12...")

# Check current PyTorch version (Colab usually has it pre-installed)
try:
    import torch
    print(f"Existing PyTorch version: {torch.__version__}")
    print(f"CUDA available: {torch.cuda.is_available()}")
except:
    # Install PyTorch if not present (use latest stable for Python 3.12)
    print("Installing PyTorch...")
    !pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121

# Install Ultralytics YOLO (YOLOv8) for instance segmentation
# Use latest version compatible with current PyTorch
!pip install ultralytics

# OpenCV is usually pre-installed in Colab, but ensure we have it
!pip install opencv-python opencv-python-headless

# Install core dependencies (use versions compatible with Python 3.12)
!pip install Pillow scikit-image imageio imageio-ffmpeg

# Clone E2FGVI repository
if not os.path.exists('E2FGVI'):
    !git clone https://github.com/MCG-NKU/E2FGVI.git
    print("✓ E2FGVI repository cloned")
else:
    print("✓ E2FGVI repository already exists")

%cd E2FGVI

# Install mmcv-full (critical for E2FGVI - requires CUDA ops)
# E2FGVI needs mmcv._ext with ModulatedDeformConv2d (deformable convolution)
print("\nInstalling mmcv-full with CUDA extensions...")

# Get current PyTorch and CUDA versions
import torch
torch_version = '.'.join(torch.__version__.split('.')[:2])  # e.g., "2.5"
cuda_version = torch.version.cuda.replace('.', '')[:4]  # e.g., "121" from "12.1"

print(f"PyTorch version: {torch_version}")
print(f"CUDA version: {cuda_version}")

# Uninstall any existing mmcv first
!pip uninstall -y mmcv mmcv-full 2>/dev/null || true

# Install mmcv-full from OpenMMLab pre-built wheels
# This avoids compilation and should work with Python 3.12
mmcv_url = f"https://download.openmmlab.com/mmcv/dist/cu{cuda_version}/torch{torch_version}/index.html"
print(f"Installing from: {mmcv_url}")

!pip install mmcv-full==1.7.2 -f {mmcv_url}

# Verify installation
print("\nVerifying mmcv-full installation...")
try:
    import mmcv
    from mmcv.ops import ModulatedDeformConv2d
    print(f"✓ mmcv-full {mmcv.__version__} installed successfully")
    print("✓ mmcv.ops.ModulatedDeformConv2d available (required by E2FGVI)")
except ImportError as e:
    print(f"✗ Import failed: {e}")
    print("\n⚠️  mmcv-full installation unsuccessful.")
    print("Trying alternative: building from source (this will take 10-15 minutes)...")

    # Last resort: build from source
    !pip install mmcv-full==1.7.2 --no-cache-dir

    try:
        import mmcv
        from mmcv.ops import ModulatedDeformConv2d
        print(f"✓ mmcv-full {mmcv.__version__} built successfully")
    except ImportError as e2:
        print(f"✗ Still failed: {e2}")
        print("\n" + "="*80)
        print("CRITICAL ERROR: Unable to install mmcv-full")
        print("="*80)
        print("E2FGVI requires mmcv-full with CUDA extensions.")
        print("This is a known compatibility issue with newer Python/PyTorch versions.")
        print("\nPossible solutions:")
        print("1. Use Python 3.10 or earlier (recommended)")
        print("2. Try a different Colab runtime")
        print("3. Use a local environment with compatible versions")
        print("="*80)

# Additional E2FGVI dependencies
!pip install tensorboard tqdm pyyaml addict yapf

# Check if E2FGVI needs patching for mmcv 2.x compatibility
print("\nChecking E2FGVI compatibility with mmcv 2.x...")
import_check = """
import sys
sys.path.insert(0, '/content/E2FGVI')
try:
    from model.modules.flow_comp import SPyNet
    print("✓ E2FGVI imports successfully")
except ImportError as e:
    print(f"✗ Import error: {e}")
    if "mmcv.cnn" in str(e):
        print("  E2FGVI needs mmcv 1.x (mmcv-full). Applying compatibility fix...")
"""

try:
    exec(import_check)
except:
    print("  Running compatibility patch...")
    # Create a simple compatibility shim if needed
    pass

# Download E2FGVI-HQ pretrained model from correct URL
!mkdir -p release_model

# Check if model already exists
if not os.path.exists('release_model/E2FGVI-HQ-CVPR22.pth'):
    print("\nDownloading E2FGVI-HQ model...")
    # Use alternative download methods
    try:
        # Try Google Drive link
        !gdown --fuzzy "https://drive.google.com/file/d/10wGdKSUOie0XmCr8SQ2A2FeDe-mfn5w3/view?usp=sharing" -O release_model/E2FGVI-HQ-CVPR22.pth
    except:
        # If gdown fails, try wget with direct link
        !wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=10wGdKSUOie0XmCr8SQ2A2FeDe-mfn5w3' -O release_model/E2FGVI-HQ-CVPR22.pth
else:
    print("✓ E2FGVI-HQ model already exists")

# download smp
!pip install -q --no-cache-dir kagglehub segmentation-models-pytorch albumentations

# Verify model downloaded correctly
import os
if os.path.exists('release_model/E2FGVI-HQ-CVPR22.pth'):
    model_size = os.path.getsize('release_model/E2FGVI-HQ-CVPR22.pth') / (1024*1024)
    print(f"✓ E2FGVI-HQ model downloaded successfully ({model_size:.1f} MB)")
else:
    print("✗ Model download failed! You may need to manually download from:")
    print("   https://drive.google.com/file/d/10wGdKSUOie0XmCr8SQ2A2FeDe-mfn5w3/view")

%cd ..

print("\n" + "="*80)
print("✓ ALL INSTALLATIONS COMPLETED!")
print("="*80)

# Verify critical dependencies
print("\nVerifying installations...")
errors = []

try:
    import torch
    print(f"✓ PyTorch {torch.__version__} (CUDA: {torch.cuda.is_available()})")
except ImportError:
    errors.append("PyTorch not installed")

try:
    import mmcv
    print(f"✓ mmcv {mmcv.__version__}")
except ImportError:
    errors.append("mmcv not installed - E2FGVI will NOT work!")

try:
    from ultralytics import YOLO
    print(f"✓ Ultralytics YOLO installed")
except ImportError:
    errors.append("Ultralytics not installed")

try:
    import cv2
    print(f"✓ OpenCV {cv2.__version__}")
except ImportError:
    errors.append("OpenCV not installed")

try:
  import segmentation_models_pytorch as smp
  print(f"✓ segmentation_models_pytorch installed")
except ImportError:
    errors.append("segmentation_models_pytorch not installed")

if errors:
    print("\n" + "="*80)
    print("⚠️  INSTALLATION ERRORS DETECTED:")
    print("="*80)
    for error in errors:
        print(f"  ✗ {error}")
    print("\nPlease fix these errors before proceeding.")
    print("For mmcv issues, try: !pip install mmcv-full")
else:
    print("\n✓ All critical dependencies verified successfully!")


Installing dependencies compatible with Colab's Python 3.12...
Existing PyTorch version: 2.9.0+cu126
CUDA available: True
Collecting ultralytics
  Downloading ultralytics-8.3.231-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.231-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.231 ultralytics-thop-2.0.18
Cloning into 'E2FGVI'...
remote: Enumerating objects: 345, done.[K
remote: Counting objects: 100% (80/80), done.[K
remote: Compressing objects: 100% (50/50), done.[K
remote: Total 345 (delta 51), reused 30 (delta 30), pack-reused 265 (from 1)[K
Receiving objects: 100% (345/345), 36.75 



✓ mmcv-full 1.7.2 installed successfully
✓ mmcv.ops.ModulatedDeformConv2d available (required by E2FGVI)

Checking E2FGVI compatibility with mmcv 2.x...
✓ E2FGVI imports successfully

Downloading E2FGVI-HQ model...
Downloading...
From (original): https://drive.google.com/uc?id=10wGdKSUOie0XmCr8SQ2A2FeDe-mfn5w3
From (redirected): https://drive.google.com/uc?id=10wGdKSUOie0XmCr8SQ2A2FeDe-mfn5w3&confirm=t&uuid=838b5304-977c-4757-af2a-8b1c9dfa6c1d
To: /content/E2FGVI/release_model/E2FGVI-HQ-CVPR22.pth
100% 165M/165M [00:03<00:00, 47.8MB/s]
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.8/154.8 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25h✓ E2FGVI-HQ model downloaded successfully (156.9 MB)
/content

✓ ALL INSTALLATIONS COMPLETED!

Verifying installations...
✓ PyTorch 2.9.0+cu126 (CUDA: True)
✓ mmcv 1.7.2
Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings w

In [9]:
# ============================================================================
# 2. IMPORTS
# ============================================================================

import cv2
import numpy as np
from pathlib import Path
import shutil
from typing import Tuple, List, Optional, Dict
from PIL import Image
import torch
from ultralytics import YOLO
import glob
import subprocess
from datetime import datetime
import segmentation_models_pytorch as smp
import albumentations as A
from albumentations.pytorch import ToTensorV2
from pathlib import Path
from tqdm import tqdm

# Add E2FGVI to path
sys.path.append('E2FGVI')

print(f"\nEnvironment Information:")
print(f"  PyTorch version: {torch.__version__}")
print(f"  CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"  CUDA version: {torch.version.cuda}")
    print(f"  GPU: {torch.cuda.get_device_name(0)}")
print(f"  OpenCV version: {cv2.__version__}")



Environment Information:
  PyTorch version: 2.9.0+cu126
  CUDA available: True
  CUDA version: 12.6
  GPU: Tesla T4
  OpenCV version: 4.12.0


In [10]:
# ============================================================================
# 3. FUNCTIONS FOR CREATING FRAMES AND MASKS
# ============================================================================

def create_project_structure(video_path: str, base_output_dir: str = "/content/output") -> dict:
    """
    Create organized folder structure for video processing.

    Args:
        video_path: Path to input video file
        base_output_dir: Base directory for all outputs

    Returns:
        Dictionary containing all relevant paths
    """
    video_name = Path(video_path).stem
    project_dir = os.path.join(base_output_dir, video_name)

    paths = {
        'project_dir': project_dir,
        'frames_dir': os.path.join(project_dir, 'frames'),
        'masks_dir': os.path.join(project_dir, 'masks'),
        'yolo_masks_dir': os.path.join(project_dir, 'masks', 'yolo'),
        'combined_masks_dir': os.path.join(project_dir, 'masks', 'combined'),
        'results_dir': os.path.join(project_dir, 'results'),
        'video_name': video_name,
        'video_path': video_path
    }

    # Create all directories
    for key, path in paths.items():
        if key.endswith('_dir'):
            os.makedirs(path, exist_ok=True)

    return paths

def extract_frames(video_path: str, frames_dir: str, fps: Optional[int] = None,
                   target_resolution: Optional[Tuple[int, int]] = None) -> Tuple[int, int, int, float]:
    """
    Extract frames from video as numbered PNG files (E2FGVI format: 00000.png, 00001.png, ...).

    Args:
        video_path: Path to input video
        frames_dir: Directory to save frames
        fps: Target FPS (None = use original)
        target_resolution: Target resolution as (width, height) tuple (None = use original)

    Returns:
        Tuple of (frame_count, width, height, original_fps)
    """
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        raise ValueError(f"Could not open video: {video_path}")

    original_fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Determine output resolution
    if target_resolution:
        width, height = target_resolution
        print(f"  Original: {original_width}x{original_height}, Target: {width}x{height}, {original_fps:.2f} FPS, {total_frames} frames")
    else:
        width, height = original_width, original_height
        print(f"  Video info: {width}x{height}, {original_fps:.2f} FPS, {total_frames} frames")

    # Calculate frame interval if FPS is specified
    frame_interval = 1 if fps is None else int(original_fps / fps)

    frame_count = 0
    saved_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Save frame if it matches the interval
        if frame_count % frame_interval == 0:
            # Resize frame if target resolution specified
            if target_resolution and (original_width != width or original_height != height):
                frame = cv2.resize(frame, (width, height), interpolation=cv2.INTER_AREA)

            # E2FGVI format: 5-digit zero-padded PNG files
            frame_filename = os.path.join(frames_dir, f"{saved_count:05d}.png")
            cv2.imwrite(frame_filename, frame)
            saved_count += 1

        frame_count += 1

        if frame_count % 100 == 0:
            print(f"  Processing frames: {frame_count}/{total_frames}", end='\r')

    cap.release()
    print(f"\n  ✓ Extracted {saved_count} frames")

    return saved_count, width, height, original_fps


def detect_and_segment_people(frames_dir: str, masks_dir: str,
                               model_name: str = 'yolov8x-seg.pt',
                               conf_threshold: float = 0.25) -> int:
    """
    Detect and segment people using YOLO, creating binary masks.
    E2FGVI requires: white (255) for regions to inpaint, black (0) for regions to keep.

    Args:
        frames_dir: Directory containing frame images
        masks_dir: Directory to save mask images
        model_name: YOLO model to use (yolov8n-seg.pt to yolov8x-seg.pt)
        conf_threshold: Confidence threshold for detections

    Returns:
        Number of masks created
    """
    print(f"\n  Loading YOLO model: {model_name}")
    model = YOLO(model_name)

    frame_files = sorted([f for f in os.listdir(frames_dir) if f.endswith('.png')])
    total_frames = len(frame_files)

    print(f"  Detecting people in {total_frames} frames...")

    for idx, frame_file in enumerate(frame_files):
        frame_path = os.path.join(frames_dir, frame_file)
        frame = cv2.imread(frame_path)
        h, w = frame.shape[:2]

        # Create blank mask (black background = keep these regions)
        mask = np.zeros((h, w), dtype=np.uint8)

        # Run YOLO inference (class 0 = person in COCO dataset)
        results = model(frame, conf=conf_threshold, classes=[0], verbose=False)

        # Process segmentation masks
        if results[0].masks is not None:
            for seg_mask in results[0].masks.data:
                # Resize mask to frame size
                seg_mask_resized = cv2.resize(
                    seg_mask.cpu().numpy(),
                    (w, h),
                    interpolation=cv2.INTER_LINEAR
                )
                # Add to combined mask (binary OR) - white (255) = inpaint these regions
                mask = np.maximum(mask, (seg_mask_resized > 0.5).astype(np.uint8) * 255)

        # Save mask with same filename as frame (E2FGVI requirement)
        mask_path = os.path.join(masks_dir, frame_file)
        cv2.imwrite(mask_path, mask)

        if (idx + 1) % 50 == 0 or (idx + 1) == total_frames:
            print(f"  Processed: {idx + 1}/{total_frames} frames", end='\r')

    print(f"\n  ✓ Created {total_frames} masks")
    return total_frames





In [11]:
# ============================================================================
# 3.1 SPACE FOR ADDITIONAL MASK CREATION AND COMBINATION
# ============================================================================

# Define validation transform
val_transform = A.Compose([
    A.Resize(256, 256), # Resize for model input
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), # Normalize
    ToTensorV2(), # Convert to PyTorch tensor
])

def combine_masks(mask_dirs: List[str], output_dir: str) -> int:
    """
    Combine masks from multiple sources using OR operation.
    Useful for combining YOLO masks with manual masks or other detection sources.

    Args:
        mask_dirs: List of directories containing masks
        output_dir: Directory to save combined masks

    Returns:
        Number of combined masks created
    """
    print(f"\n  Combining masks from {len(mask_dirs)} sources...")

    # Get all mask files from first directory
    mask_files = sorted([f for f in os.listdir(mask_dirs[0]) if f.endswith('.png')])

    for mask_file in mask_files:
        combined_mask = None

        for mask_dir in mask_dirs:
            mask_path = os.path.join(mask_dir, mask_file)
            if os.path.exists(mask_path):
                mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
                if combined_mask is None:
                    combined_mask = mask
                else:
                    combined_mask = np.maximum(combined_mask, mask)

        if combined_mask is not None:
            output_path = os.path.join(output_dir, mask_file)
            cv2.imwrite(output_path, combined_mask)

    print(f"  ✓ Combined {len(mask_files)} masks")
    return len(mask_files)


def dilate_masks(input_dir: str, output_dir: str, kernel_size: int = 5, iterations: int = 1) -> int:
    """
    Dilate masks to ensure complete coverage of people and remove edge artifacts.
    This helps ensure no person pixels remain after inpainting.

    Args:
        input_dir: Directory containing input masks
        output_dir: Directory to save dilated masks
        kernel_size: Size of dilation kernel (larger = more dilation)
        iterations: Number of dilation iterations

    Returns:
        Number of masks processed
    """
    print(f"  Dilating masks (kernel={kernel_size}, iterations={iterations})...")
    mask_files = sorted([f for f in os.listdir(input_dir) if f.endswith('.png')])
    kernel = np.ones((kernel_size, kernel_size), np.uint8)

    for mask_file in mask_files:
        mask_path = os.path.join(input_dir, mask_file)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        dilated = cv2.dilate(mask, kernel, iterations=iterations)

        output_path = os.path.join(output_dir, mask_file)
        cv2.imwrite(output_path, dilated)

    print(f"  ✓ Dilated {len(mask_files)} masks")
    return len(mask_files)

def setup_shadow_model():
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  shadow_model = smp.Unet(
      encoder_name="resnet34",        # Encoder backbone
      encoder_weights="imagenet",     # Pre-trained weights
      in_channels=3,                  # RGB input
      classes=1,                      # Binary segmentation (shadow/no-shadow)
      activation=None,                # We'll apply sigmoid in loss
  )

  # Load entire model (simplest!)
  shadow_model = torch.load('/content/shadow_model_full.pth', map_location=device, weights_only=False)
  shadow_model.eval()

  print("✓ Shadow Model loaded!")
  return shadow_model, device


# define functions for use of model
def predict_frame_shadow(image_rgb, model, device, threshold=0.5):
    original_size = image_rgb.shape[:2]

    # Transform
    transformed = val_transform(image=image_rgb)
    img_tensor = transformed['image'].unsqueeze(0).to(device)

    # Predict
    with torch.no_grad():
        output = torch.sigmoid(model(img_tensor))
        pred = output.cpu().squeeze().numpy()

    # Resize back to original size
    pred = cv2.resize(pred, (original_size[1], original_size[0]))
    mask = (pred > threshold).astype(np.uint8) * 255

    return mask


def filter_shadows_near_people(shadow_mask, people_mask, max_distance=50):
    """
    Filter shadow mask to only keep shadows near people

    Args:
        shadow_mask: Binary shadow mask (H, W) with 255 for shadows
        people_mask: Binary people mask (H, W) with 255 for people
        max_distance: Maximum pixel distance to consider

    Returns:
        Filtered shadow mask
    """
    shadow_binary = (shadow_mask > 127).astype(np.uint8)
    people_binary = (people_mask > 127).astype(np.uint8)

    if people_binary.sum() == 0:
        return np.zeros_like(shadow_mask)

    kernel_size = max_distance * 2 + 1
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
    people_dilated = cv2.dilate(people_binary, kernel, iterations=1)

    filtered_shadows = cv2.bitwise_and(shadow_binary, people_dilated)
    return (filtered_shadows * 255).astype(np.uint8)

def filter_shadows_connected_to_people(shadow_mask, people_mask):
    """
    Filter shadows to only keep regions touching people (uses connected components)

    Args:
        shadow_mask: Binary shadow mask (H, W) with 255 for shadows
        people_mask: Binary people mask (H, W) with 255 for people

    Returns:
        Filtered shadow mask with only shadows touching people
    """
    shadow_binary = (shadow_mask > 127).astype(np.uint8)
    people_binary = (people_mask > 127).astype(np.uint8)

    if people_binary.sum() == 0:
        return np.zeros_like(shadow_mask)

    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(shadow_binary, connectivity=8)
    filtered_mask = np.zeros_like(shadow_binary)

    for label in range(1, num_labels):
        region_mask = (labels == label).astype(np.uint8)
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
        region_dilated = cv2.dilate(region_mask, kernel, iterations=1)

        if cv2.bitwise_and(region_dilated, people_binary).sum() > 0:
            filtered_mask = cv2.bitwise_or(filtered_mask, region_mask)

    return (filtered_mask * 255).astype(np.uint8)


def detect_and_add_shadows(frames_dir: str,
                           people_masks_dir: str,
                           output_dir: str,
                           shadow_model,
                           device,
                           val_transform,
                           shadow_threshold: float = 0.5,
                           max_distance: int = 50,
                           shadow_coverage_threshold: float = 0.5) -> int:
    """
    Detect shadows near people and add them to the masks.

    Args:
        frames_dir: Directory containing frame images
        people_masks_dir: Directory containing people masks
        output_dir: Directory to save combined masks (people + shadows)
        shadow_model: Trained shadow detection model
        device: PyTorch device (cuda/cpu)
        val_transform: Transform function for shadow model input
        shadow_threshold: Confidence threshold for shadow detection
        max_distance: Maximum pixel distance from people to consider shadows
        shadow_coverage_threshold: If shadow covers more than this % of frame, skip shadow filtering

    Returns:
        Number of masks processed
    """
    print(f"  Detecting shadows near people (max_distance={max_distance}px)...")

    frame_files = sorted([f for f in os.listdir(frames_dir) if f.endswith('.png')])
    skipped_count = 0

    for idx, frame_file in enumerate(frame_files):
        # Read frame and people mask
        frame_path = os.path.join(frames_dir, frame_file)
        people_mask_path = os.path.join(people_masks_dir, frame_file)

        frame = cv2.imread(frame_path)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        people_mask = cv2.imread(people_mask_path, cv2.IMREAD_GRAYSCALE)

        # Predict shadow mask
        original_size = frame_rgb.shape[:2]

        # Transform
        transformed = val_transform(image=frame_rgb)
        img_tensor = transformed['image'].unsqueeze(0).to(device)

        # Predict
        with torch.no_grad():
            output = torch.sigmoid(shadow_model(img_tensor))
            pred = output.cpu().squeeze().numpy()

        # Resize back to original size
        pred = cv2.resize(pred, (original_size[1], original_size[0]))
        shadow_mask = (pred > shadow_threshold).astype(np.uint8) * 255

        # Check shadow coverage - if too much shadow, skip filtering
        shadow_binary = (shadow_mask > 127).astype(np.uint8)
        total_pixels = shadow_binary.size
        shadow_pixels = shadow_binary.sum()
        shadow_coverage = shadow_pixels / total_pixels

        if shadow_coverage >= shadow_coverage_threshold:
            # Too much shadow in frame - just use people mask without shadow filtering
            combined_mask = people_mask.copy()
            skipped_count += 1
        else:
            # Two-stage shadow filtering
            # Stage 1: Keep shadows near people (distance-based)
            nearby_shadows = filter_shadows_near_people(shadow_mask, people_mask, max_distance)

            # Stage 2: Keep shadows connected to people (component-based)
            connected_shadows = filter_shadows_connected_to_people(shadow_mask, people_mask)

            # Combine both filters (union)
            filtered_shadows = cv2.bitwise_and(nearby_shadows, connected_shadows)

            # Combine people mask with filtered shadows
            combined_mask = cv2.bitwise_or(people_mask, filtered_shadows)

        # Save combined mask
        output_path = os.path.join(output_dir, frame_file)
        cv2.imwrite(output_path, combined_mask)

        if (idx + 1) % 50 == 0 or (idx + 1) == len(frame_files):
            print(f"  Processed: {idx + 1}/{len(frame_files)} frames", end='\r')

    print(f"\n  ✓ Added shadows to {len(frame_files)} masks")
    if skipped_count > 0:
        print(f"  ℹ️  Skipped shadow filtering for {skipped_count} frames (>{shadow_coverage_threshold*100:.0f}% shadow coverage)")
    return len(frame_files)






In [12]:
# ============================================================================
# 4. FUNCTIONS FOR USING E2FGVI
# ============================================================================

def prepare_e2fgvi_input(frames_dir: str, masks_dir: str, video_name: str,
                         e2fgvi_base_dir: str = "E2FGVI") -> dict:
    """
    Prepare input structure for E2FGVI.
    E2FGVI expects:
      - inputs/<dataset_name>/<video_name>/frames/00000.png, 00001.png, ...
      - inputs/<dataset_name>/<video_name>_masks/00000.png, 00001.png, ...

    Args:
        frames_dir: Directory containing frames
        masks_dir: Directory containing masks
        video_name: Name of the video
        e2fgvi_base_dir: E2FGVI repository directory

    Returns:
        Dictionary with E2FGVI input paths
    """
    dataset_name = "object_removal"
    e2fgvi_input_base = os.path.join(e2fgvi_base_dir, "inputs", dataset_name)

    paths = {
        'frames': os.path.join(e2fgvi_input_base, video_name, "frames"),
        'masks': os.path.join(e2fgvi_input_base, f"{video_name}_masks")
    }

    # Create directories and copy files
    for key, target_dir in paths.items():
        os.makedirs(target_dir, exist_ok=True)

        # Remove existing files
        for f in glob.glob(os.path.join(target_dir, "*.png")):
            os.remove(f)

        # Copy new files
        source_dir = frames_dir if key == 'frames' else masks_dir
        for img_file in sorted(glob.glob(os.path.join(source_dir, "*.png"))):
            shutil.copy(img_file, target_dir)

    print(f"  ✓ Prepared E2FGVI input structure")
    return paths


def run_e2fgvi_inference(video_name: str,
                         width: int,
                         height: int,
                         output_path: str,
                         e2fgvi_dir: str = "E2FGVI",
                         use_hq_model: bool = True) -> str:
    """
    Run E2FGVI inference for video inpainting.

    Args:
        video_name: Name of the video (used for input naming in E2FGVI)
        width: Video width
        height: Video height
        output_path: Where to save the final output video
        e2fgvi_dir: E2FGVI repository directory
        use_hq_model: Whether to use HQ model (supports arbitrary resolution)

    Returns:
        Path to output video
    """
    import re
    from tqdm import tqdm

    # Change to E2FGVI directory
    original_dir = os.getcwd()
    os.chdir(e2fgvi_dir)

    try:
        # Prepare paths for E2FGVI
        dataset_name = "object_removal"
        video_path = os.path.join("inputs", dataset_name, video_name, "frames")
        mask_path = os.path.join("inputs", dataset_name, f"{video_name}_masks")

        # Count total frames for progress bar
        total_frames = len([f for f in os.listdir(video_path) if f.endswith('.png')])

        # Prepare command
        model_type = "e2fgvi_hq" if use_hq_model else "e2fgvi"
        checkpoint = "release_model/E2FGVI-HQ-CVPR22.pth" if use_hq_model else "release_model/E2FGVI-CVPR22.pth"

        cmd = [
            "python", "test.py",
            "--model", model_type,
            "--video", video_path,
            "--mask", mask_path,
            "--ckpt", checkpoint,
            "--width", str(width),
            "--height", str(height),
            "--set_size",  # Required flag when specifying width/height
            '--num_ref', '40',
            '--neighbor_stride', '5'
        ]

        print(f"\n  Running E2FGVI inference...")
        print(f"  Model: {model_type}")
        print(f"  Resolution: {width}x{height}")
        print(f"  Frames to process: {total_frames}")
        print(f"  Video: {video_path}")
        print(f"  Masks: {mask_path}")

        # Run inference with real-time progress monitoring
        process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            universal_newlines=True,
            bufsize=1
        )

        # Create progress bar
        pbar = tqdm(total=total_frames, desc="  Inpainting frames", unit="frame")

        # Monitor output
        output_lines = []
        current_frame = 0

        for line in process.stdout:
            output_lines.append(line)

            # Look for frame progress indicators in E2FGVI output
            # E2FGVI might output something like "Processing frame 10/100" or similar
            frame_match = re.search(r'(\d+)/(\d+)', line)
            if frame_match:
                frame_num = int(frame_match.group(1))
                if frame_num > current_frame:
                    pbar.update(frame_num - current_frame)
                    current_frame = frame_num

            # Alternative: look for progress percentage
            percent_match = re.search(r'(\d+)%', line)
            if percent_match:
                percent = int(percent_match.group(1))
                target_frame = int(total_frames * percent / 100)
                if target_frame > current_frame:
                    pbar.update(target_frame - current_frame)
                    current_frame = target_frame

            # Show errors in real-time
            if 'error' in line.lower() or 'traceback' in line.lower():
                print(f"\n  {line.strip()}")

        process.wait()

        # Complete the progress bar
        if current_frame < total_frames:
            pbar.update(total_frames - current_frame)
        pbar.close()

        if process.returncode != 0:
            print("\n  Full output:")
            print(''.join(output_lines))
            raise RuntimeError(f"E2FGVI inference failed with exit code {process.returncode}")

        print("  ✓ E2FGVI processing completed")

        # Find output video in E2FGVI results directory
        # E2FGVI can create results in different locations depending on the test.py version:
        # Option 1: results/<video_name>/<timestamp>.mp4
        # Option 2: results/<timestamp>.mp4
        # Option 3: results/object_removal/<video_name>.mp4

        possible_result_dirs = [
            os.path.join("/content/E2FGVI/results"),
            os.path.join("results", video_name),           # Most common
            os.path.join("results"),                        # Sometimes directly in results
            os.path.join("results", "object_removal"),      # Alternative structure
        ]

        e2fgvi_output = None

        # Search all possible locations
        for result_dir in possible_result_dirs:
            if not os.path.exists(result_dir):
                continue

            # Look for mp4 files
            output_files = sorted([f for f in os.listdir(result_dir) if f.endswith('.mp4')])

            if output_files:
                # Get the most recent output
                e2fgvi_output = os.path.join(result_dir, output_files[-1])
                print(f"  Found output: {e2fgvi_output}")
                break

        if not e2fgvi_output:
            # Debug: list all files in results directory
            print("\n  Debugging - Contents of results directory:")
            if os.path.exists("results"):
                for root, dirs, files in os.walk("results"):
                    print(f"    {root}:")
                    for d in dirs:
                        print(f"      [DIR] {d}")
                    for f in files:
                        print(f"      [FILE] {f}")
            else:
                print("    Results directory doesn't exist!")

            raise RuntimeError(f"No output video found. Searched in: {possible_result_dirs}")

        # Copy to final destination
        shutil.copy(e2fgvi_output, output_path)

        print(f"  ✓ Inpainting completed")

    finally:
        # Return to original directory
        os.chdir(original_dir)

    return output_path


In [13]:
# ============================================================================
# 5. BATCH PROCESSING FUNCTION
# ============================================================================

def process_video(video_path: str,
                  yolo_model: str = 'yolov8x-seg.pt',
                  conf_threshold: float = 0.25,
                  dilate_kernel: int = 5,
                  dilate_iterations: int = 2,
                  target_resolution: Optional[Tuple[int, int]] = None,
                  max_frames: Optional[int] = None,
                  detect_shadows: bool = False,
                  shadow_model = None,
                  shadow_device = None,
                  shadow_transform = None,
                  shadow_threshold: float = 0.5,
                  shadow_max_distance: int = 50) -> Dict:
    """
    Process a single video: extract frames, detect people, create masks, inpaint.

    Args:
        video_path: Path to video file
        yolo_model: YOLO model to use
        conf_threshold: Detection confidence threshold
        dilate_kernel: Dilation kernel size
        dilate_iterations: Number of dilation iterations
        target_resolution: Target resolution as (width, height) tuple (None = use original)

    Returns:
        Dictionary with processing results and paths
    """
    start_time = datetime.now()
    video_name = Path(video_path).stem

    print("\n" + "="*80)
    print(f"PROCESSING: {video_name}")
    print("="*80)

    try:
        # Create project structure
        print("\n[1/6] Creating project structure...")
        paths = create_project_structure(video_path)

        # Extract frames
        print("\n[2/6] Extracting frames...")
        frame_count, width, height, fps = extract_frames(
            video_path,
            paths['frames_dir'],
            target_resolution=target_resolution
        )

        # Detect and segment people
        print("\n[3/6] Detecting and segmenting people with YOLO...")
        mask_count = detect_and_segment_people(
            paths['frames_dir'],
            paths['yolo_masks_dir'],
            model_name=yolo_model,
            conf_threshold=conf_threshold
        )

        # Post-process masks (dilation for better coverage)
        print("\n[4/6] Post-processing masks...")

        # First, optionally add shadow detection
        if detect_shadows:
            if shadow_model is None or shadow_device is None or shadow_transform is None:
                print("  ⚠️  Shadow detection enabled but model/device/transform not provided. Skipping shadows.")
                # Just copy YOLO masks to combined
                for mask_file in os.listdir(paths['yolo_masks_dir']):
                    shutil.copy(
                        os.path.join(paths['yolo_masks_dir'], mask_file),
                        os.path.join(paths['combined_masks_dir'], mask_file)
                    )
            else:
                # Detect shadows and combine with people masks
                detect_and_add_shadows(
                    paths['frames_dir'],
                    paths['yolo_masks_dir'],
                    paths['combined_masks_dir'],
                    shadow_model,
                    shadow_device,
                    shadow_transform,
                    shadow_threshold=shadow_threshold,
                    max_distance=shadow_max_distance
                )
        else:
            # No shadow detection - just copy YOLO masks
            for mask_file in os.listdir(paths['yolo_masks_dir']):
                shutil.copy(
                    os.path.join(paths['yolo_masks_dir'], mask_file),
                    os.path.join(paths['combined_masks_dir'], mask_file)
                )

        # Then apply dilation
        dilate_masks(
            paths['combined_masks_dir'],
            paths['combined_masks_dir'],  # Overwrite in place
            kernel_size=dilate_kernel,
            iterations=dilate_iterations
        )


        # Prepare E2FGVI input
        print("\n[5/6] Preparing E2FGVI input...")
        e2fgvi_paths = prepare_e2fgvi_input(
            paths['frames_dir'],
            paths['combined_masks_dir'],
            video_name
        )

        # Run E2FGVI inpainting
        print("\n[6/6] Running E2FGVI inpainting...")
        output_video_path = os.path.join(paths['results_dir'], f"{video_name}_inpainted.mp4")
        run_e2fgvi_inference(
            video_name,
            width,
            height,
            output_video_path
        )

        elapsed = (datetime.now() - start_time).total_seconds()

        print("\n" + "="*80)
        print(f"✓ SUCCESS: {video_name}")
        print(f"  Output: {output_video_path}")
        print(f"  Time: {elapsed:.1f}s ({elapsed/60:.1f} minutes)")
        print("="*80)

        return {
            'success': True,
            'video_name': video_name,
            'video_path': video_path,
            'output_path': output_video_path,
            'project_dir': paths['project_dir'],
            'frames': frame_count,
            'resolution': f"{width}x{height}",
            'time': elapsed
        }

    except Exception as e:
        elapsed = (datetime.now() - start_time).total_seconds()
        print(f"\n✗ ERROR processing {video_name}: {str(e)}")
        return {
            'success': False,
            'video_name': video_name,
            'video_path': video_path,
            'error': str(e),
            'time': elapsed
        }


def batch_process_videos(input_dir: str = "/content/",
                         yolo_model: str = 'yolov8x-seg.pt',
                         conf_threshold: float = 0.25,
                         dilate_kernel: int = 5,
                         dilate_iterations: int = 2,
                         target_resolution: Optional[Tuple[int, int]] = None,
                         max_frames: Optional[int] = None,
                         detect_shadows: bool = False,
                         shadow_model = None,
                         shadow_device = None,
                         shadow_transform = None,
                         shadow_threshold: float = 0.5,
                         shadow_max_distance: int = 50) -> List[Dict]:
    """
    Process all MP4 videos in the input directory.

    Args:
        input_dir: Directory containing MP4 files
        yolo_model: YOLO model to use
        conf_threshold: Detection confidence threshold
        dilate_kernel: Dilation kernel size
        dilate_iterations: Number of dilation iterations
        target_resolution: Target resolution as (width, height) tuple (None = use original)
        max_frames: Maximum frames per video (None = no limit, helps avoid OOM)
        detect_shadows: Whether to detect and include shadows in masks
        shadow_model: Shadow detection model (required if detect_shadows=True)
        shadow_device: PyTorch device for shadow model
        shadow_transform: Transform function for shadow model
        shadow_threshold: Confidence threshold for shadow detection
        shadow_max_distance: Maximum distance from people to consider shadows

    Returns:
        List of result dictionaries for each video
    """
    # Find all MP4 files
    video_files = glob.glob(os.path.join(input_dir, "*.mp4"))

    if not video_files:
        print(f"No MP4 files found in {input_dir}")
        return []

    print("\n" + "="*80)
    print(f"BATCH PROCESSING: Found {len(video_files)} MP4 file(s)")
    print("="*80)
    for vf in video_files:
        print(f"  - {Path(vf).name}")

    results = []

    for idx, video_path in enumerate(video_files, 1):
        print(f"\n\n{'='*80}")
        print(f"VIDEO {idx}/{len(video_files)}")
        print('='*80)

        result = process_video(
            video_path,
            yolo_model=yolo_model,
            conf_threshold=conf_threshold,
            dilate_kernel=dilate_kernel,
            dilate_iterations=dilate_iterations,
            target_resolution=target_resolution,
            max_frames=max_frames,
            detect_shadows=detect_shadows,
            shadow_model=shadow_model,
            shadow_device=shadow_device,
            shadow_transform=shadow_transform,
            shadow_threshold=shadow_threshold,
            shadow_max_distance=shadow_max_distance
        )
        results.append(result)

        # Clear GPU memory after each video to prevent accumulation
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            print(f"  ✓ GPU memory cleared for next video")

    # Print summary
    print("\n\n" + "="*80)
    print("BATCH PROCESSING SUMMARY")
    print("="*80)

    successful = [r for r in results if r['success']]
    failed = [r for r in results if not r['success'] and not r.get('skipped', False)]
    skipped = [r for r in results if r.get('skipped', False)]

    print(f"\nTotal videos: {len(results)}")
    print(f"Successful: {len(successful)}")
    print(f"Failed: {len(failed)}")
    if skipped:
        print(f"Skipped (too long): {len(skipped)}")

    if successful:
        print("\n✓ SUCCESSFUL VIDEOS:")
        for r in successful:
            print(f"  - {r['video_name']}")
            print(f"    Output: {r['output_path']}")
            print(f"    Time: {r['time']:.1f}s")

    if failed:
        print("\n✗ FAILED VIDEOS:")
        for r in failed:
            print(f"  - {r['video_name']}")
            print(f"    Error: {r['error']}")

    if skipped:
        print("\n⊘ SKIPPED VIDEOS (too long):")
        for r in skipped:
            print(f"  - {r['video_name']}")
            print(f"    Reason: {r['error']}")

    total_time = sum(r['time'] for r in results)
    print(f"\nTotal processing time: {total_time:.1f}s ({total_time/60:.1f} minutes)")
    print("="*80)

    return results



In [14]:
# ============================================================================
# 6. RUN BATCH PROCESSING ON ALL MP4 FILES IN /content/
# ============================================================================

# Configuration
INPUT_DIRECTORY = "/content"  # Process all MP4 files in this directory
YOLO_MODEL = "yolov8x-seg.pt"  # Options: yolov8n-seg.pt (fastest), yolov8s-seg.pt, yolov8m-seg.pt, yolov8l-seg.pt, yolov8x-seg.pt (most accurate)
CONF_THRESHOLD = 0.25          # Lower = detect more (more false positives), Higher = detect less (more false negatives)
DILATE_KERNEL = 10              # Size of dilation kernel (larger = more mask expansion)
DILATE_ITERATIONS = 2          # Number of dilation passes (more = larger masks)
TARGET_RESOLUTION = (432, 240) # Resize videos to this resolution for faster processing (None = keep original)
                               # Recommended for GPU memory: (432, 240) or (640, 360)
                               # Higher resolutions may cause out-of-memory errors!
MAX_FRAMES = 270
# Shadow detection configuration
DETECT_SHADOWS = True         # Set to True to detect and remove shadows along with people
SHADOW_THRESHOLD = 0.5         # Confidence threshold for shadow detection (0.0-1.0)
SHADOW_MAX_DISTANCE = 100       # Maximum pixel distance from people to consider shadows



print("\n" + "="*80)
print("CONFIGURATION")
print("="*80)
print(f"Input directory: {INPUT_DIRECTORY}")
print(f"YOLO model: {YOLO_MODEL}")
print(f"Confidence threshold: {CONF_THRESHOLD}")
print(f"Mask dilation: kernel={DILATE_KERNEL}, iterations={DILATE_ITERATIONS}")
print(f"Target resolution: {TARGET_RESOLUTION if TARGET_RESOLUTION else 'Original (no rescaling)'}")
print(f"Shadow detection: {'Enabled' if DETECT_SHADOWS else 'Disabled'}")
if DETECT_SHADOWS:
    print(f"  Shadow threshold: {SHADOW_THRESHOLD}")
    print(f"  Shadow max distance: {SHADOW_MAX_DISTANCE}px")

# Clear GPU memory before starting
import torch
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print(f"\nGPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB total")
    print("✓ GPU cache cleared")

# Set PyTorch memory optimization
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
print("✓ GPU memory fragmentation optimization enabled")

shadow_model = None
shadow_device = None
shadow_transform = val_transform
# Load Shadow
if DETECT_SHADOWS:
  shadow_model, shadow_device = setup_shadow_model()

# Run batch processing
results = batch_process_videos(
    input_dir=INPUT_DIRECTORY,
    yolo_model=YOLO_MODEL,
    conf_threshold=CONF_THRESHOLD,
    dilate_kernel=DILATE_KERNEL,
    dilate_iterations=DILATE_ITERATIONS,
    target_resolution=TARGET_RESOLUTION,
    max_frames=MAX_FRAMES,
    detect_shadows=DETECT_SHADOWS,
    shadow_model=shadow_model if DETECT_SHADOWS else None,
    shadow_device=shadow_device if DETECT_SHADOWS else None,
    shadow_transform=shadow_transform if DETECT_SHADOWS else None,
    shadow_threshold=SHADOW_THRESHOLD,
    shadow_max_distance=SHADOW_MAX_DISTANCE
)

# Display first successful result (if any) in Colab
successful_results = [r for r in results if r['success']]
if successful_results:
    print("\n" + "="*80)
    print("DISPLAYING FIRST RESULT")
    print("="*80)
    from IPython.display import Video, display
    first_result = successful_results[0]
    print(f"Video: {first_result['video_name']}")
    display(Video(first_result['output_path'], width=640))


CONFIGURATION
Input directory: /content
YOLO model: yolov8x-seg.pt
Confidence threshold: 0.25
Mask dilation: kernel=10, iterations=2
Target resolution: (432, 240)
Shadow detection: Enabled
  Shadow threshold: 0.5
  Shadow max distance: 100px

GPU: Tesla T4
GPU Memory: 14.7 GB total
✓ GPU cache cleared
✓ GPU memory fragmentation optimization enabled
✓ Shadow Model loaded!

BATCH PROCESSING: Found 3 MP4 file(s)
  - shadow concrete.mp4
  - shadow wall.mp4
  - shadow bench.mp4


VIDEO 1/3

PROCESSING: shadow concrete

[1/6] Creating project structure...

[2/6] Extracting frames...
  Original: 848x478, Target: 432x240, 30.00 FPS, 146 frames

  ✓ Extracted 146 frames

[3/6] Detecting and segmenting people with YOLO...

  Loading YOLO model: yolov8x-seg.pt
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x-seg.pt to 'yolov8x-seg.pt': 100% ━━━━━━━━━━━━ 137.4MB 93.3MB/s 1.5s
  Detecting people in 146 frames...
  Processed: 146/146 frames
  ✓ Created 146 masks

  Inpainting frames: 100%|██████████| 146/146 [01:12<00:00,  2.01frame/s]


  ✓ E2FGVI processing completed
  Found output: /content/E2FGVI/results/frames_results.mp4
  ✓ Inpainting completed

✓ SUCCESS: shadow concrete
  Output: /content/output/shadow concrete/results/shadow concrete_inpainted.mp4
  Time: 100.1s (1.7 minutes)
  ✓ GPU memory cleared for next video


VIDEO 2/3

PROCESSING: shadow wall

[1/6] Creating project structure...

[2/6] Extracting frames...
  Original: 848x478, Target: 432x240, 30.00 FPS, 147 frames

  ✓ Extracted 147 frames

[3/6] Detecting and segmenting people with YOLO...

  Loading YOLO model: yolov8x-seg.pt
  Detecting people in 147 frames...
  Processed: 147/147 frames
  ✓ Created 147 masks

[4/6] Post-processing masks...
  Detecting shadows near people (max_distance=100px)...
  Processed: 147/147 frames
  ✓ Added shadows to 147 masks
  Dilating masks (kernel=10, iterations=2)...
  ✓ Dilated 147 masks

[5/6] Preparing E2FGVI input...
  ✓ Prepared E2FGVI input structure

[6/6] Running E2FGVI inpainting...

  Running E2FGVI inferen

  Inpainting frames: 100%|██████████| 147/147 [01:11<00:00,  2.05frame/s]


  ✓ E2FGVI processing completed
  Found output: /content/E2FGVI/results/frames_results.mp4
  ✓ Inpainting completed

✓ SUCCESS: shadow wall
  Output: /content/output/shadow wall/results/shadow wall_inpainted.mp4
  Time: 92.6s (1.5 minutes)
  ✓ GPU memory cleared for next video


VIDEO 3/3

PROCESSING: shadow bench

[1/6] Creating project structure...

[2/6] Extracting frames...
  Original: 848x478, Target: 432x240, 29.76 FPS, 128 frames

  ✓ Extracted 128 frames

[3/6] Detecting and segmenting people with YOLO...

  Loading YOLO model: yolov8x-seg.pt
  Detecting people in 128 frames...
  Processed: 128/128 frames
  ✓ Created 128 masks

[4/6] Post-processing masks...
  Detecting shadows near people (max_distance=100px)...
  Processed: 128/128 frames
  ✓ Added shadows to 128 masks
  Dilating masks (kernel=10, iterations=2)...
  ✓ Dilated 128 masks

[5/6] Preparing E2FGVI input...
  ✓ Prepared E2FGVI input structure

[6/6] Running E2FGVI inpainting...

  Running E2FGVI inference...
  Mode

  Inpainting frames: 100%|██████████| 128/128 [00:58<00:00,  2.19frame/s]

  ✓ E2FGVI processing completed
  Found output: /content/E2FGVI/results/frames_results.mp4
  ✓ Inpainting completed

✓ SUCCESS: shadow bench
  Output: /content/output/shadow bench/results/shadow bench_inpainted.mp4
  Time: 78.9s (1.3 minutes)
  ✓ GPU memory cleared for next video


BATCH PROCESSING SUMMARY

Total videos: 3
Successful: 3
Failed: 0

✓ SUCCESSFUL VIDEOS:
  - shadow concrete
    Output: /content/output/shadow concrete/results/shadow concrete_inpainted.mp4
    Time: 100.1s
  - shadow wall
    Output: /content/output/shadow wall/results/shadow wall_inpainted.mp4
    Time: 92.6s
  - shadow bench
    Output: /content/output/shadow bench/results/shadow bench_inpainted.mp4
    Time: 78.9s

Total processing time: 271.6s (4.5 minutes)

DISPLAYING FIRST RESULT
Video: shadow concrete



