# Download and Extract Video Clips

This notebook downloads and extracts video clips from Google Drive for preprocessing.

In [None]:
# Virtual Environment Setup
import sys
import os
from pathlib import Path

# Add virtual environment packages to Python path
venv_site_packages = "/workspace/venv/lib/python3.12/site-packages"
if venv_site_packages not in sys.path:
    sys.path.insert(0, venv_site_packages)

# Add project to Python path
project_root = Path("/workspace/fslr-transformer-vs-iv3gru")
sys.path.insert(0, str(project_root))

# Fix matplotlib backend for MediaPipe (only if matplotlib is available)
os.environ['MPLBACKEND'] = 'Agg'
try:
    import matplotlib
    matplotlib.use('Agg')
    print("✅ Matplotlib backend configured")
except ImportError:
    print("⚠️ Matplotlib not installed yet - will be handled when needed")

print("✅ Virtual environment configured")
print(f"🐍 Python: {sys.executable}")
print(f"📁 Project: {project_root}")
print(f"📦 Venv packages: {venv_site_packages}")

In [None]:
# Install gdown in virtual environment
import subprocess
import sys

print("Installing gdown...")
try:
    # Use virtual environment pip
    result = subprocess.run(["/workspace/venv/bin/pip", "install", "gdown"], 
                          check=True, capture_output=True, text=True)
    print("✅ gdown installed successfully in virtual environment")
    print(f"Output: {result.stdout}")
except subprocess.CalledProcessError as e:
    print(f"❌ Installation failed with return code {e.returncode}")
    print(f"Error: {e.stderr}")
    
    # Try system pip as fallback
    try:
        print("Trying system pip...")
        result = subprocess.run([sys.executable, "-m", "pip", "install", "gdown"], 
                              check=True, capture_output=True, text=True)
        print("✅ gdown installed with system pip")
        print(f"Output: {result.stdout}")
    except subprocess.CalledProcessError as e2:
        print(f"❌ System pip also failed: {e2.stderr}")

# Verify installation
try:
    import gdown
    print("✅ gdown imported successfully!")
except ImportError as e:
    print(f"❌ gdown still not available: {e}")
    print("🔄 Please restart kernel and try again")

In [None]:
# Import required packages
try:
    import gdown
    print("✅ gdown imported successfully")
except ImportError:
    print("❌ gdown not available. Please run the installation cell above first.")
    print("🔄 After installation, restart the kernel and run this cell again.")
    raise ImportError("gdown module not found")

import os
from pathlib import Path

# Configuration
FILE_ID = '1U9xzaYIUMeXpQFo03tUyJTRwahX88OpJ'
OUTPUT_DIR = Path('../data/raw')
ZIP_FILE = OUTPUT_DIR / 'clips.zip'

# Create output directory
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"📁 Output directory: {OUTPUT_DIR}")
print(f"📦 Zip file: {ZIP_FILE}")
print(f"🔗 Google Drive file ID: {FILE_ID}")

In [None]:
# Download video clips from Google Drive
print("📥 Downloading video clips...")
gdown_url = f'https://drive.google.com/uc?id={FILE_ID}'
gdown.download(gdown_url, str(ZIP_FILE), quiet=False)
print("✅ Download completed!")

In [None]:
# Extract video clips
print("📂 Extracting video clips...")
import zipfile

with zipfile.ZipFile(ZIP_FILE, 'r') as zip_ref:
    zip_ref.extractall(OUTPUT_DIR)

print("✅ Extraction completed!")

# Clean up zip file
ZIP_FILE.unlink()
print("🗑️ Cleaned up zip file")

# Count extracted videos before renaming
clips_dir = OUTPUT_DIR / "clips"
if clips_dir.exists():
    video_files = (list(clips_dir.rglob("*.mp4")) + list(clips_dir.rglob("*.mov")) + 
                   list(clips_dir.rglob("*.MP4")) + list(clips_dir.rglob("*.MOV")))
    print(f"📹 Found {len(video_files)} video files in clips/")
else:
    print("❌ No clips directory found!")
    video_files = []

In [None]:
# Rename and flatten clips using rename_clips.py
if video_files and clips_dir.exists():
    print("\n🔄 Renaming and flattening clips...")
    
    # Note: Using rename.csv from preprocessing/utils/ directory
    print("📋 Using rename.csv from preprocessing/utils/ directory")
    
    try:
        import subprocess
        import sys
        
        # Run rename_clips.py
        result = subprocess.run([
            sys.executable, "-m", "preprocessing.utils.rename_clips", 
            "--root", ".."
        ], capture_output=True, text=True)
        
        if result.returncode == 0:
            print("✅ Clips renamed successfully!")
            
            # Count renamed videos
            renamed_videos = (list(OUTPUT_DIR.glob("*.mp4")) + list(OUTPUT_DIR.glob("*.mov")) + 
                             list(OUTPUT_DIR.glob("*.MP4")) + list(OUTPUT_DIR.glob("*.MOV")))
            print(f"📹 Renamed {len(renamed_videos)} video files")
            
            # Show sample renamed files
            if renamed_videos:
                print("\n📄 Sample renamed files:")
                for i, video in enumerate(renamed_videos[:5]):
                    file_size = video.stat().st_size / (1024 * 1024)  # MB
                    print(f"  {i+1}. {video.name} ({file_size:.1f} MB)")
                if len(renamed_videos) > 5:
                    print(f"  ... and {len(renamed_videos) - 5} more")
        else:
            print(f"❌ Rename failed: {result.stderr}")
            
    except Exception as e:
        print(f"❌ Error running rename_clips: {e}")
        print("📁 Videos remain in clips/ directory")
else:
    print("❌ No video files found to rename!")

In [None]:
# Final status and next steps
print("\n✅ Setup complete!")
print(f"📁 Videos location: {OUTPUT_DIR}")

# Check final video count
final_videos = (list(OUTPUT_DIR.glob("*.mp4")) + list(OUTPUT_DIR.glob("*.mov")) + 
                list(OUTPUT_DIR.glob("*.MP4")) + list(OUTPUT_DIR.glob("*.MOV")))
if final_videos:
    print(f"📹 Total videos ready: {len(final_videos)}")
    print("\n🚀 Next steps:")
    print("1. Run vast_ai_setup.ipynb to set up the project")
    print("2. Run run_multi_preprocess.ipynb to process videos")
    print("3. Use the processed data for training")
else:
    print("❌ No videos found in final location!")
