In [12]:
%load_ext autoreload
%autoreload 2

import nfl_data_py as nfl
import pandas as pd
from dave_ledger.core import paths

# Define paths using your helper
repo_root = paths.find_repo_root()
raw_data_dir = repo_root / "data" / "raw"
processed_data_dir = repo_root / "data" / "processed"

# Ensure directories exist
raw_data_dir.mkdir(parents=True, exist_ok=True)
processed_data_dir.mkdir(parents=True, exist_ok=True)

print(f"üìÇ Data directories ready at: {raw_data_dir}")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
üìÇ Data directories ready at: /home/winstonunderwood/projects/dave-ledger/data/raw


In [9]:
import nflreadpy as nfl
import pandas as pd

# CONFIGURATION
# 2025 is valid now because we fixed the library!
CURRENT_YEAR = 2025
HISTORY_YEARS = 5
YEARS = [CURRENT_YEAR - i for i in range(HISTORY_YEARS)] # [2025, 2024, 2023, 2022, 2021]

print(f"üìÖ Target History Window: {YEARS}")

# Define file paths
files = {
    "weekly": raw_data_dir / f"weekly_{YEARS[-1]}_{YEARS[0]}.parquet",
    "snaps": raw_data_dir / f"snaps_{YEARS[-1]}_{YEARS[0]}.parquet",
    "rosters": raw_data_dir / f"rosters_{YEARS[-1]}_{YEARS[0]}.parquet"
}

# --- 1. Weekly Stats (The dividends) ---
if files["weekly"].exists():
    print(f"‚ö° Loading cached WEEKLY data...")
    df_weekly = pd.read_parquet(files["weekly"])
else:
    print(f"‚¨áÔ∏è Downloading WEEKLY stats for {YEARS}...")
    # CORRECTED ARGUMENT: seasons=YEARS
    df_weekly = nfl.load_player_stats(seasons=YEARS).to_pandas()
    
    # Filter for Regular Season immediately
    df_weekly = df_weekly[df_weekly['season_type'] == 'REG']
    df_weekly.to_parquet(files["weekly"], index=False)
    print(f"üíæ Cached {len(df_weekly)} weekly records.")

# --- 2. Snap Counts (The opportunity) ---
if files["snaps"].exists():
    print(f"‚ö° Loading cached SNAP counts...")
    df_snaps = pd.read_parquet(files["snaps"])
else:
    print(f"‚¨áÔ∏è Downloading SNAP counts for {YEARS}...")
    # CORRECTED ARGUMENT: seasons=YEARS
    df_snaps = nfl.load_snap_counts(seasons=YEARS).to_pandas()
    df_snaps.to_parquet(files["snaps"], index=False)
    print(f"üíæ Cached {len(df_snaps)} snap records.")

# --- 3. Rosters (The context) ---
if files["rosters"].exists():
    print(f"‚ö° Loading cached ROSTER data...")
    df_rosters = pd.read_parquet(files["rosters"])
else:
    print(f"‚¨áÔ∏è Downloading ROSTER data for {YEARS}...")
    # CORRECTED ARGUMENT: seasons=YEARS
    df_rosters = nfl.load_rosters(seasons=YEARS).to_pandas()
    df_rosters.to_parquet(files["rosters"], index=False)
    print(f"üíæ Cached {len(df_rosters)} roster records.")

print("‚úÖ Data Ingestion Complete (via nflreadpy).")

üìÖ Target History Window: [2025, 2024, 2023, 2022, 2021]
‚¨áÔ∏è Downloading WEEKLY stats for [2025, 2024, 2023, 2022, 2021]...
üíæ Cached 89515 weekly records.
‚¨áÔ∏è Downloading SNAP counts for [2025, 2024, 2023, 2022, 2021]...
üíæ Cached 129927 snap records.
‚¨áÔ∏è Downloading ROSTER data for [2025, 2024, 2023, 2022, 2021]...
üíæ Cached 15534 roster records.
‚úÖ Data Ingestion Complete (via nflreadpy).
