# Download and Extract Video Clips

This notebook downloads and extracts video clips from Google Drive for preprocessing.

In [None]:
# Install required packages
%pip install gdown

In [None]:
import gdown
import os
from pathlib import Path

# Configuration
FILE_ID = '1U9xzaYIUMeXpQFo03tUyJTRwahX88OpJ'
OUTPUT_DIR = Path('../data/raw')
ZIP_FILE = OUTPUT_DIR / 'clips.zip'

# Create output directory
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"üìÅ Output directory: {OUTPUT_DIR}")
print(f"üì¶ Zip file: {ZIP_FILE}")

In [None]:
# Download video clips from Google Drive
print("üì• Downloading video clips...")
gdown_url = f'https://drive.google.com/uc?id={FILE_ID}'
gdown.download(gdown_url, str(ZIP_FILE), quiet=False)
print("‚úÖ Download completed!")

In [None]:
# Extract video clips
print("üìÇ Extracting video clips...")
import zipfile

with zipfile.ZipFile(ZIP_FILE, 'r') as zip_ref:
    zip_ref.extractall(OUTPUT_DIR)

print("‚úÖ Extraction completed!")

# Clean up zip file
ZIP_FILE.unlink()
print("üóëÔ∏è Cleaned up zip file")

# Count extracted videos before renaming
clips_dir = OUTPUT_DIR / "clips"
if clips_dir.exists():
    video_files = (list(clips_dir.rglob("*.mp4")) + list(clips_dir.rglob("*.mov")) + 
                   list(clips_dir.rglob("*.MP4")) + list(clips_dir.rglob("*.MOV")))
    print(f"üìπ Found {len(video_files)} video files in clips/")
else:
    print("‚ùå No clips directory found!")
    video_files = []

In [None]:
# Rename and flatten clips using rename_clips.py
if video_files and clips_dir.exists():
    print("\nüîÑ Renaming and flattening clips...")
    
    # Copy labels.csv if it doesn't exist
    labels_source = Path("../shared/keypoints_all/labels.csv")
    labels_dest = OUTPUT_DIR / "labels.csv"
    if labels_source.exists() and not labels_dest.exists():
        import shutil
        shutil.copy2(labels_source, labels_dest)
        print(f"üìã Copied labels.csv to {labels_dest}")
    
    try:
        import subprocess
        import sys
        
        # Run rename_clips.py
        result = subprocess.run([
            sys.executable, "../preprocessing/rename_clips.py", 
            "--root", ".."
        ], capture_output=True, text=True)
        
        if result.returncode == 0:
            print("‚úÖ Clips renamed successfully!")
            
            # Count renamed videos
            renamed_videos = (list(OUTPUT_DIR.glob("*.mp4")) + list(OUTPUT_DIR.glob("*.mov")) + 
                             list(OUTPUT_DIR.glob("*.MP4")) + list(OUTPUT_DIR.glob("*.MOV")))
            print(f"üìπ Renamed {len(renamed_videos)} video files")
            
            # Show sample renamed files
            if renamed_videos:
                print("\nüìÑ Sample renamed files:")
                for i, video in enumerate(renamed_videos[:5]):
                    file_size = video.stat().st_size / (1024 * 1024)  # MB
                    print(f"  {i+1}. {video.name} ({file_size:.1f} MB)")
                if len(renamed_videos) > 5:
                    print(f"  ... and {len(renamed_videos) - 5} more")
        else:
            print(f"‚ùå Rename failed: {result.stderr}")
            
    except Exception as e:
        print(f"‚ùå Error running rename_clips: {e}")
        print("üìÅ Videos remain in clips/ directory")
else:
    print("‚ùå No video files found to rename!")

In [None]:
# Final status and next steps
print("\n‚úÖ Setup complete!")
print(f"üìÅ Videos location: {OUTPUT_DIR}")

# Check final video count
final_videos = (list(OUTPUT_DIR.glob("*.mp4")) + list(OUTPUT_DIR.glob("*.mov")) + 
                list(OUTPUT_DIR.glob("*.MP4")) + list(OUTPUT_DIR.glob("*.MOV")))
if final_videos:
    print(f"üìπ Total videos ready: {len(final_videos)}")
    print("\nüöÄ Next steps:")
    print("1. Run vast_ai_setup.ipynb to set up the project")
    print("2. Run run_multi_preprocess.ipynb to process videos")
    print("3. Use the processed data for training")
else:
    print("‚ùå No videos found in final location!")
