In [1]:
from pathlib import Path
import re
import pandas as pd
import numpy as np
import json
import pickle

# directory containing game parquet files (relative to this notebook)
path = Path('..') / 'data' / 'processed' / 'game'
files = sorted(path.glob('*.parquet'))
# load exactly one DataFrame per season (keep the first file encountered for each year)
games = {}  # maps YYYY -> DataFrame
for f in files:
    # read parquet first so any I/O/parquet-engine errors surface immediately
    df = pd.read_parquet(f)
    fname = f.name
    m = re.search(r'(19|20)\d{2}', fname)
    if m:
        year = m.group(0)
    else:
        # fallback: use filename without suffix
        year = fname.rsplit('.parquet', 1)[0]
    # normalize to a 4-digit year string when possible
    year_str = year if (isinstance(year, str) and len(str(year)) == 4 and str(year).isdigit()) else str(year)
    # if we've already loaded a DataFrame for this season, skip further files
    if year_str in games:
        # skip duplicates for the same season
        continue
    # register the DataFrame for this season
    games[year_str] = df
    # expose short-name globals: games_YY (e.g. games_16) and games_YYYY
    short = year_str[-2:] if year_str.isdigit() and len(year_str) == 4 else year_str
    globals()[f'games_{year_str}'] = df
    globals()[f'games_{short}'] = df
    print(f'Loaded {f} -> games_{year_str} (alias games_{short}), shape={df.shape}')
# convenience alias: dfs points to the per-season mapping we just built
dfs = games
# use dfs['2017'] or globals()['games_17'] as needed

Loaded ../data/processed/game/games_wide_2016_regular.parquet -> games_2016 (alias games_16), shape=(832, 196)
Loaded ../data/processed/game/games_wide_2017_regular.parquet -> games_2017 (alias games_17), shape=(834, 196)
Loaded ../data/processed/game/games_wide_2018_regular.parquet -> games_2018 (alias games_18), shape=(845, 196)
Loaded ../data/processed/game/games_wide_2019_regular.parquet -> games_2019 (alias games_19), shape=(848, 196)
Loaded ../data/processed/game/games_wide_2020_regular.parquet -> games_2020 (alias games_20), shape=(542, 196)
Loaded ../data/processed/game/games_wide_2021_regular.parquet -> games_2021 (alias games_21), shape=(849, 196)
Loaded ../data/processed/game/games_wide_2022_regular.parquet -> games_2022 (alias games_22), shape=(854, 196)
Loaded ../data/processed/game/games_wide_2023_regular.parquet -> games_2023 (alias games_23), shape=(868, 196)
Loaded ../data/processed/game/games_wide_2024_regular.parquet -> games_2024 (alias games_24), shape=(874, 196)
L