# Downloading Pitch Videos

This notebook shows how to download pitch video clips from Baseball Savant.

In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent))

import pandas as pd
from mlb_pitcher_videos import VideoDownloader

## Load Statcast Data

First, load the pitch data we scraped in the previous notebook.

In [None]:
# Load the statcast data
data_path = Path('../data/statcast.csv')

if not data_path.exists():
    print("No statcast data found!")
    print("Run 02_scrape_statcast_data.ipynb first.")
else:
    data = pd.read_csv(data_path)
    print(f"Loaded {len(data)} pitches")

In [None]:
# Preview what we have
print("Pitch types:")
print(data['pitch_type'].value_counts())

## Download Videos

Now let's download the video clips. 

**Note:** This can take a while for many videos. Start with a small batch to test.

In [None]:
# CONFIGURE: How many videos to download
MAX_VIDEOS = 10  # Start small, increase later

# Output directory
output_dir = Path('../data/videos')

In [None]:
# Initialize downloader
downloader = VideoDownloader(
    delay=0.5,      # Seconds between requests (be nice to servers)
    max_retries=3,  # Retry failed downloads
)

In [None]:
# Download videos
print(f"Downloading up to {MAX_VIDEOS} videos...")
print(f"Output directory: {output_dir}")
print()

results = downloader.download_from_dataframe(
    data,
    output_dir=output_dir,
    max_videos=MAX_VIDEOS,
)

In [None]:
# Summary
print("\n" + "="*40)
print("Download Summary")
print("="*40)
print(f"Total attempted: {results['total']}")
print(f"Successful: {results['successful']}")
print(f"Failed: {results['failed']}")
print(f"Skipped (already existed): {results['skipped']}")

In [None]:
# List downloaded videos
videos = list(output_dir.glob('*.mp4'))
print(f"\nVideos in {output_dir}:")
for v in videos[:10]:
    size_mb = v.stat().st_size / (1024 * 1024)
    print(f"  {v.name} ({size_mb:.1f} MB)")
if len(videos) > 10:
    print(f"  ... and {len(videos) - 10} more")

## Download More Videos

Once you've tested with a small batch, increase `MAX_VIDEOS` and run again.

Already-downloaded videos will be skipped automatically.

In [None]:
# Download more (uncomment to run)
# MAX_VIDEOS = 100
# results = downloader.download_from_dataframe(data, output_dir, max_videos=MAX_VIDEOS)

## Next: Isolate Pitcher

Continue to `04_isolate_pitcher.ipynb` to crop and trim videos to focus on the pitcher.