# Part 1: Box Score Advanced Extraction (2015-2016 Season)

**Games:** Split into 3 batches of 410

**Output:** `pt1_advanced_2015-2016.csv`, `pt2_advanced_2015-2016.csv`, `pt3_advanced_2015-2016.csv`

This notebook fetches advanced box score data from the NBA API's `BoxScoreAdvancedV3` endpoint for the 2015-16 regular season.

In [None]:
from nba_api.stats.endpoints import (
    leaguegamefinder,
    BoxScoreAdvancedV3
)
import pandas as pd
from time import sleep
import random

## Step 1: Get all game IDs for 2015-16 Regular Season

In [None]:
# Grab all games from 2015-16 season
gamefinder = leaguegamefinder.LeagueGameFinder(
    season_nullable='2015-16',
    league_id_nullable='00',
    season_type_nullable='Regular Season'
)

games = gamefinder.get_data_frames()[0]
all_game_ids = games['GAME_ID'].unique().tolist()

print(f"Total games in season: {len(all_game_ids)}")

# Split into 3 batches of 410
batch1_ids = all_game_ids[:410]
batch2_ids = all_game_ids[410:820]
batch3_ids = all_game_ids[820:]

print(f"Batch 1: {len(batch1_ids)} games")
print(f"Batch 2: {len(batch2_ids)} games")
print(f"Batch 3: {len(batch3_ids)} games")

## Step 2: Define helper function

In [None]:
def get_game_advanced_data(game_id: str) -> pd.DataFrame | None:
    """Fetch advanced box score stats for a single game."""
    try:
        response = BoxScoreAdvancedV3(
            game_id=game_id,
            timeout=31
        )

        df = response.player_stats.get_data_frame()
        
        if df.empty:
            return None
            
        # Keep only relevant columns
        df = df[['gameId', 'teamId', 'personId', 'minutes', 'usagePercentage', 'pace', 'possessions']]
        return df

    except Exception as e:
        print(f"[ERROR] game_id={game_id}: {e}")
        return None

## Step 3a: Fetch data — Batch 1 (games 1-410)

In [None]:
all_player_stats = []

for i, game_id in enumerate(batch1_ids, start=1):
    game_data = get_game_advanced_data(game_id)
    
    if game_data is not None:
        all_player_stats.append(game_data)
    
    # Rate limiting - random interval between requests
    sleep(random.uniform(1.2, 2.1))
    
    # Progress update every 25 games
    if i % 25 == 0:
        print(f"[INFO] Processed {i}/{len(batch1_ids)} games")

print(f"\n[COMPLETE] Successfully fetched data for {len(all_player_stats)} games")

In [None]:
# Combine and save batch 1
combined_df = pd.concat(all_player_stats, ignore_index=True)

output_path = 'pt1_advanced_2015-2016.csv'
combined_df.to_csv(output_path, index=False)

print(f"Saved {len(combined_df)} rows to {output_path}")

## Step 3b: Fetch data — Batch 2 (games 411-820)

In [None]:
all_player_stats = []

for i, game_id in enumerate(batch2_ids, start=1):
    game_data = get_game_advanced_data(game_id)
    
    if game_data is not None:
        all_player_stats.append(game_data)
    
    # Rate limiting - random interval between requests
    sleep(random.uniform(1.6, 2.5))
    
    # Progress update every 20 games
    if i % 20 == 0:
        print(f"[INFO] Processed {i}/{len(batch2_ids)} games")

print(f"\n[COMPLETE] Successfully fetched data for {len(all_player_stats)} games")

In [None]:
# Combine and save batch 2
combined_df = pd.concat(all_player_stats, ignore_index=True)

output_path = 'pt2_advanced_2015-2016.csv'
combined_df.to_csv(output_path, index=False)

print(f"Saved {len(combined_df)} rows to {output_path}")

## Step 3c: Fetch data — Batch 3 (games 821-1230)

In [None]:
all_player_stats = []

for i, game_id in enumerate(batch3_ids, start=1):
    game_data = get_game_advanced_data(game_id)
    
    if game_data is not None:
        all_player_stats.append(game_data)
    
    # Rate limiting - random interval between requests
    sleep(random.uniform(1.6, 2.6))
    
    # Progress update every 25 games
    if i % 20 == 0:
        print(f"[INFO] Processed {i}/{len(batch3_ids)} games")

print(f"\n[COMPLETE] Successfully fetched data for {len(all_player_stats)} games")

In [None]:
# Combine and save batch 3
combined_df = pd.concat(all_player_stats, ignore_index=True)

output_path = 'pt3_advanced_2015-2016.csv'
combined_df.to_csv(output_path, index=False)

print(f"Saved {len(combined_df)} rows to {output_path}")

## Step 4: Concat all CSVs into one

In [None]:
pt1 = pd.read_csv('pt1_advanced_2015-2016.csv')
pt2 = pd.read_csv('pt2_advanced_2015-2016.csv')
pt3 = pd.read_csv('pt3_advanced_2015-2016.csv')

# Combine all 4 dataframes
combined_df = pd.concat([pt1, pt2, pt3], ignore_index=True)
combined_df.to_csv('advanced_2015-2016_full.csv', index=False)

print(f"Combined {len(combined_df)} total rows into advanced_2015-2016_full.csv")