# Download and Extract Video Clips

This notebook downloads and extracts video clips from Google Drive for preprocessing.

In [None]:
# Install required packages
%pip install gdown

In [None]:
import gdown
import os
from pathlib import Path

# Configuration
FILE_ID = '1U9xzaYIUMeXpQFo03tUyJTRwahX88OpJ'
OUTPUT_DIR = Path('../data/raw')
ZIP_FILE = OUTPUT_DIR / 'clips.zip'

# Create output directory
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"üìÅ Output directory: {OUTPUT_DIR}")
print(f"üì¶ Zip file: {ZIP_FILE}")

In [None]:
# Download video clips from Google Drive
print("üì• Downloading video clips...")
gdown_url = f'https://drive.google.com/uc?id={FILE_ID}'
gdown.download(gdown_url, str(ZIP_FILE), quiet=False)
print("‚úÖ Download completed!")

In [None]:
# Extract video clips
print("üìÇ Extracting video clips...")
import zipfile

with zipfile.ZipFile(ZIP_FILE, 'r') as zip_ref:
    zip_ref.extractall(OUTPUT_DIR)

print("‚úÖ Extraction completed!")

# Clean up zip file
ZIP_FILE.unlink()
print("üóëÔ∏è Cleaned up zip file")

# Count extracted videos
video_files = list(OUTPUT_DIR.glob("*.mp4")) + list(OUTPUT_DIR.glob("*.mov"))
print(f"üìπ Found {len(video_files)} video files")

In [None]:
# Show sample files
if video_files:
    print("\nüìÑ Sample video files:")
    for i, video in enumerate(video_files[:5]):
        file_size = video.stat().st_size / (1024 * 1024)  # MB
        print(f"  {i+1}. {video.name} ({file_size:.1f} MB)")
    if len(video_files) > 5:
        print(f"  ... and {len(video_files) - 5} more")
    
    print(f"\n‚úÖ Ready for preprocessing!")
    print(f"üìÅ Videos location: {OUTPUT_DIR}")
else:
    print("‚ùå No video files found!")

In [None]:
# Next steps
print("\nüöÄ Next steps:")
print("1. Run vast_ai_setup.ipynb to set up the project")
print("2. Run run_multi_preprocess.ipynb to process videos")
print("3. Use the processed data for training")
