# Imports

In [1]:
import pandas as pd
import time
import random
import os
from nba_api.stats.endpoints import leaguegamelog, boxscoretraditionalv3, boxscoreadvancedv3

pd.set_option('display.max_columns', None)

# Configuration

In [2]:
season = '2021-22'

# Functions

In [3]:
# Sleep for dynamic time
def smart_sleep(min_sec=0.8, max_sec=1.4):
    time.sleep(random.uniform(min_sec, max_sec))

In [4]:
# Get a set of existing game IDs from a CSV file
def get_existing_game_ids(csv_path):
    if os.path.exists(csv_path):
        existing = pd.read_csv(csv_path, usecols=['gameId'], dtype={'gameId': str})
        return set(existing['gameId'].unique())
    return set()

In [5]:
# Get all game IDs and dates for a given season
def get_all_game_ids_and_dates(season):
    gamelog = leaguegamelog.LeagueGameLog(
        season=season,
        player_or_team_abbreviation='T',
        season_type_all_star='Regular Season'
    )
    smart_sleep()
    df = gamelog.get_data_frames()[0]
    df = df[['GAME_ID', 'GAME_DATE']]
    df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])

    df = df.drop_duplicates(subset='GAME_ID')

    return df

In [6]:
# Fetch player and team traditional box scores for a game
def fetch_traditional(game_id):
    traditional_box = boxscoretraditionalv3.BoxScoreTraditionalV3(game_id=game_id)
    smart_sleep()

    traditional_player_df = traditional_box.player_stats.get_data_frame()
    traditional_team_df = traditional_box.team_stats.get_data_frame()

    return traditional_player_df, traditional_team_df


In [7]:
# Fetch player and team advanced box scores for a game
def fetch_advanced(game_id):
    advanced_box = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id=game_id)
    smart_sleep()

    advanced_player_df = advanced_box.player_stats.get_data_frame() 
    advanced_team_df = advanced_box.team_stats.get_data_frame()

    return advanced_player_df, advanced_team_df

In [8]:
def main(season):
    os.makedirs(f"data/{season}", exist_ok=True)

    print(f"🔍 Checking existing data for {season}...")
    
    paths = [
        f"data/{season}/traditional_player_{season}.csv",
        f"data/{season}/traditional_team_{season}.csv",
        f"data/{season}/advanced_player_{season}.csv",
        f"data/{season}/advanced_team_{season}.csv"
    ]

    # Create a 4-element list to hold existing game IDs for each path
    existing_game_ids_list = []
    for path in paths:
        existing_game_ids_list.append(get_existing_game_ids(path))

    print(f"📅 Fetching games for {season} season")
    game_id_df = get_all_game_ids_and_dates(season)

    # Create a 4-element list to hold missing game IDs for each path
    missing_games_ids_list = []
    for i, existing_game_ids in enumerate(existing_game_ids_list):
        missing_games = game_id_df[~game_id_df['GAME_ID'].isin(existing_game_ids)].reset_index(drop=True)
        missing_games_ids_list.append(missing_games)

    # Create a union of all missing game IDs for traditional/advanced player and team box scores
    missing_traditional_games = pd.concat([missing_games_ids_list[0], missing_games_ids_list[1]]).drop_duplicates(subset='GAME_ID').reset_index(drop=True)
    missing_advanced_games = pd.concat([missing_games_ids_list[2], missing_games_ids_list[3]]).drop_duplicates(subset='GAME_ID').reset_index(drop=True)

    # Fetch missing traditional stats
    print(f"📝 {len(missing_traditional_games)} missing traditional games to fetch.")
    for i, row in missing_traditional_games.iterrows():
        game_id = row['GAME_ID']
        game_date = row['GAME_DATE']
        print(f"📦 Traditional {game_id} ({game_date.date()}) [{i+1}/{len(missing_traditional_games)}]")

        try:
            traditional_player_df, traditional_team_df = fetch_traditional(game_id)
            traditional_player_df['gameDate'] = game_date
            traditional_team_df['gameDate'] = game_date

            # Only append if missing in the CSV file
            if game_id in missing_games_ids_list[0]['GAME_ID'].values:
                traditional_player_df.to_csv(paths[0], mode='a' if os.path.exists(paths[0]) else 'w', header=not os.path.exists(paths[0]), index=False)
            if game_id in missing_games_ids_list[1]['GAME_ID'].values:
                traditional_team_df.to_csv(paths[1], mode='a' if os.path.exists(paths[1]) else 'w', header=not os.path.exists(paths[1]), index=False)

            print("✅ Success\n")
        except Exception as e:
            print(f"❌ Error fetching {game_id}: {e}\n")
            continue

    print(f"📝 {len(missing_advanced_games)} missing advanced games to fetch.")
    for i, row in missing_advanced_games.iterrows():
        game_id = row['GAME_ID']
        game_date = row['GAME_DATE']
        print(f"📦 Advanced {game_id} ({game_date.date()}) [{i+1}/{len(missing_advanced_games)}]")

        try:
            advanced_player_df, advanced_team_df = fetch_advanced(game_id)
            advanced_player_df['gameDate'] = game_date
            advanced_team_df['gameDate'] = game_date

            # Only append if missing in the CSV file
            if game_id in missing_games_ids_list[2]['GAME_ID'].values:
                advanced_player_df.to_csv(paths[2], mode='a' if os.path.exists(paths[2]) else 'w', header=not os.path.exists(paths[2]), index=False)
            if game_id in missing_games_ids_list[3]['GAME_ID'].values:
                advanced_team_df.to_csv(paths[3], mode='a' if os.path.exists(paths[3]) else 'w', header=not os.path.exists(paths[3]), index=False)

            print("✅ Success\n")
        except Exception as e:
            print(f"❌ Error fetching {game_id}: {e}\n")
            continue

    print("🏁 Done!")

# Main

In [None]:
main(season)

🔍 Checking existing data for 2021-22...
📅 Fetching games for 2021-22 season
📝 1230 missing traditional games to fetch.
📦 Traditional 0022100001 (2021-10-19) [1/1230]
✅ Success

📦 Traditional 0022100002 (2021-10-19) [2/1230]
✅ Success

📦 Traditional 0022100004 (2021-10-20) [3/1230]
✅ Success

📦 Traditional 0022100013 (2021-10-20) [4/1230]
✅ Success

📦 Traditional 0022100011 (2021-10-20) [5/1230]
✅ Success

📦 Traditional 0022100005 (2021-10-20) [6/1230]
✅ Success

📦 Traditional 0022100007 (2021-10-20) [7/1230]
✅ Success

📦 Traditional 0022100008 (2021-10-20) [8/1230]
✅ Success

📦 Traditional 0022100009 (2021-10-20) [9/1230]
✅ Success

📦 Traditional 0022100012 (2021-10-20) [10/1230]
✅ Success

📦 Traditional 0022100006 (2021-10-20) [11/1230]
✅ Success

📦 Traditional 0022100003 (2021-10-20) [12/1230]
✅ Success

📦 Traditional 0022100010 (2021-10-20) [13/1230]
✅ Success

📦 Traditional 0022100015 (2021-10-21) [14/1230]
✅ Success

📦 Traditional 0022100016 (2021-10-21) [15/1230]
✅ Success

📦 Tra