# Imports

In [1]:
import pandas as pd
import time
import random
import os
from nba_api.stats.endpoints import leaguegamelog, boxscoretraditionalv3, boxscoreadvancedv3

pd.set_option('display.max_columns', None)

# Functions

In [2]:
def smart_sleep(min_sec=1.2, max_sec=3.5):
    time.sleep(random.uniform(min_sec, max_sec))

In [3]:
def get_existing_game_ids(csv_path):
    if os.path.exists(csv_path):
        existing = pd.read_csv(csv_path, usecols=['gameId'], dtype={'gameId': str})
        return set(existing['gameId'].unique())
    return set()

In [4]:
# Returns a dataframe of all regular season games with GAME_ID, GAME_DATE, SEASON, and GAME_DATE
def get_all_game_ids_and_dates(season):
    # Get all games for regular season
    print(f"📅 Fetching games for {season} season")
    gamelog = leaguegamelog.LeagueGameLog(
        season=season,
        player_or_team_abbreviation='T',
        season_type_all_star='Regular Season'
    )
    df = gamelog.get_data_frames()[0]
    df = df[['GAME_ID', 'GAME_DATE']]
    df['SEASON'] = season
    df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])

    # Drop duplicates since done for each team
    df = df.drop_duplicates(subset='GAME_ID')

    return df

In [5]:
# Returns a dataframe with all relevant data for a game
def fetch_box_score(game_id):
    # Load traditional and advanced boxscores
    traditional_box = boxscoretraditionalv3.BoxScoreTraditionalV3(game_id=game_id)
    smart_sleep()
    advanced_box = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id=game_id)

    # Get player boxscores
    traditional_player_df = traditional_box.player_stats.get_data_frame()
    advanced_player_df = advanced_box.player_stats.get_data_frame() 

    # Remove duplicate columns from advanced player boxscore
    advanced_player_df = advanced_player_df.drop([
        "gameId",
        "teamId",
        "teamCity",
        "teamName",
        "teamTricode",
        "teamSlug",
        "firstName",
        "familyName",
        "nameI",
        "playerSlug",
        "position",
        "comment",
        "jerseyNum",
        "minutes"
    ], axis=1)

    # Merge traditional and advanced player boxscores
    player_df = traditional_player_df.merge(advanced_player_df, on='personId', how='left')

    # Get team boxscores
    traditional_team_df = traditional_box.team_stats.get_data_frame()
    advanced_team_df = advanced_box.team_stats.get_data_frame()

    # Remove duplicate columns from advanced team boxscore
    advanced_team_df = advanced_team_df.drop([
        "gameId", 
        "teamCity", 
        "teamName", 
        "teamTricode", 
        "teamSlug", 
        "minutes", 
    ], axis=1)

    # Merge traditional and advanced team boxscores
    team_df = traditional_team_df.merge(advanced_team_df, on='teamId', how='left')



    # Add home column
    team_df.loc[0, 'home'] = False
    team_df.loc[1, 'home'] = True

    # Add won column
    team_df.loc[team_df['points'] == team_df['points'].max(), 'won'] = True
    team_df.loc[team_df['points'] == team_df['points'].min(), 'won'] = False



    # Rename minutes
    team_df = team_df.rename(columns={'minutes': 'gameMin'})

    # Set opponent teamId
    team_df['opp_teamId'] = team_df['teamId'].apply(lambda x: team_df.loc[team_df['teamId'] != x, 'teamId'].values[0])

    # Add team team stats to player stats
    team_team_df = team_df.copy()
    team_team_df = team_team_df.drop([
        "gameId", 
        "teamCity", 
        "teamName", 
        "teamTricode", 
        "teamSlug"
    ], axis=1)
    team_team_df = team_team_df.add_prefix('team_')
    team_team_df = team_team_df.rename(columns={'team_gameMin': 'gameMin', 'team_won': 'won', 'team_home': 'home'})

    player_df = player_df.merge(team_team_df, left_on='teamId', right_on='team_teamId', how='left')
    player_df = player_df.drop(['team_teamId', 'team_opp_teamId'], axis=1)

    # Add opponent team stats to player stats
    opp_team_df = team_df.copy()
    opp_team_df = opp_team_df.drop([
        "gameId"
    ], axis=1)
    opp_team_df = opp_team_df.drop(['gameMin', 'home', 'won'], axis=1)
    opp_team_df = opp_team_df.add_prefix('opp_')

    player_df = player_df.merge(opp_team_df, left_on='teamId', right_on='opp_opp_teamId', how='left')
    player_df = player_df.drop(['opp_opp_teamId', 'opp_teamId'], axis=1)

    return player_df

In [6]:
def main(season):
    csv_path = f"data/nba_boxscores_{season}.csv"

    print("🔍 Checking existing data...")
    existing_game_ids = get_existing_game_ids(csv_path)

    print("📅 Getting game IDs + dates for season...")
    games_df = get_all_game_ids_and_dates(season)
    new_games = games_df[~games_df['GAME_ID'].isin(existing_game_ids)]
    new_games = new_games.reset_index(drop=True)

    print(f"🆕 Found {len(new_games)} new games to fetch.")

    for i, row in new_games.iterrows():
        game_id = row['GAME_ID']
        game_date = row['GAME_DATE']

        try:
            print(f"📦 Fetching box score for {game_id} ({game_date.date()}) [{i+1}/{len(new_games)}]")
            df = fetch_box_score(game_id)
            df['gameDate'] = game_date

            if os.path.exists(csv_path):
                df.to_csv(csv_path, mode='a', header=False, index=False)
            else:
                df.to_csv(csv_path, index=False)

            print(f"✅ Success fetching {game_id}\n")
            smart_sleep()
            
        except Exception as e:
            print(f"❌ Error fetching {game_id}: {e}\n")
            continue

# Main

In [None]:
user_input = input("Enter the season (e.g., 2024-25): ")
main(user_input)

🔍 Checking existing data...
📅 Getting game IDs + dates for season...
📅 Fetching games for 2024-25 season
🆕 Found 1230 new games to fetch.
📦 Fetching box score for 0022400062 (2024-10-22) [1/1230]
✅ Success fetching 0022400062

📦 Fetching box score for 0022400061 (2024-10-22) [2/1230]
✅ Success fetching 0022400061

📦 Fetching box score for 0022400072 (2024-10-23) [3/1230]
✅ Success fetching 0022400072

📦 Fetching box score for 0022400069 (2024-10-23) [4/1230]
✅ Success fetching 0022400069

📦 Fetching box score for 0022400063 (2024-10-23) [5/1230]
✅ Success fetching 0022400063

📦 Fetching box score for 0022400066 (2024-10-23) [6/1230]
✅ Success fetching 0022400066

📦 Fetching box score for 0022400068 (2024-10-23) [7/1230]
✅ Success fetching 0022400068

📦 Fetching box score for 0022400064 (2024-10-23) [8/1230]
✅ Success fetching 0022400064

📦 Fetching box score for 0022400065 (2024-10-23) [9/1230]
✅ Success fetching 0022400065

📦 Fetching box score for 0022400071 (2024-10-23) [10/1230]
✅ 