# Part 1: Player Speed & Distance Extraction (2015-2016 Season)

**Games:** 1-600 (out of 1230)

**Output:** `pt1_movement_2015-2016.csv`

This notebook fetches player tracking data from the NBA API's `BoxScorePlayerTrackV3` endpoint for the first 600 games of the 2015-16 season.

In [2]:
from nba_api.stats.endpoints import (
    leaguegamefinder,
    BoxScorePlayerTrackV3
)
import pandas as pd
from time import sleep

## Step 1: Get game IDs 1-600

In [3]:
# Grab all games from 2015-16 season
gamefinder = leaguegamefinder.LeagueGameFinder(
    season_nullable='2015-16',
    league_id_nullable='00',
    season_type_nullable='Regular Season'
)

games = gamefinder.get_data_frames()[0]
all_game_ids = games['GAME_ID'].unique().tolist()

# Take first 12300 games
game_ids = all_game_ids[800:] 

print(f"Total games in season: {len(all_game_ids)}")
print(f"Processing games 1-1230: {len(game_ids)} games")

# print(game_ids)

Total games in season: 1230
Processing games 1-1230: 430 games


## Step 2: Define helper function

In [4]:
def get_game_tracking_data(game_id: str) -> pd.DataFrame | None:
    """Fetch player tracking stats for a single game."""
    try:
        response = BoxScorePlayerTrackV3(
            game_id=game_id,
            timeout=31
        )

        df = response.player_stats.get_data_frame()
        
        if df.empty:
            return None
            
        # Keep only relevant columns
        df = df[['gameId', 'teamId', 'personId', 'speed', 'distance']]
        return df

    except Exception as e:
        print(f"[ERROR] game_id={game_id}: {e}")
        return None

## Step 3: Fetch data (games 1-1230)

In [5]:
import random

all_player_stats = []

for i, game_id in enumerate(game_ids, start=1):
    game_data = get_game_tracking_data(game_id)
    
    if game_data is not None:
        all_player_stats.append(game_data)
    
    # Rate limiting - 2 seconds between requests
    sleep(random.uniform(1.2, 2.1))
    
    # Progress update every 50 games
    if i % 25 == 0:
        print(f"[INFO] Processed {i}/1200 games")

print(f"\n[COMPLETE] Successfully fetched data for {len(all_player_stats)} games")

[INFO] Processed 25/1200 games
[INFO] Processed 50/1200 games
[INFO] Processed 75/1200 games
[INFO] Processed 100/1200 games
[INFO] Processed 125/1200 games
[INFO] Processed 150/1200 games
[INFO] Processed 175/1200 games
[INFO] Processed 200/1200 games
[INFO] Processed 225/1200 games
[INFO] Processed 250/1200 games
[INFO] Processed 275/1200 games
[INFO] Processed 300/1200 games
[INFO] Processed 325/1200 games
[INFO] Processed 350/1200 games
[INFO] Processed 375/1200 games
[INFO] Processed 400/1200 games
[INFO] Processed 425/1200 games

[COMPLETE] Successfully fetched data for 430 games


## Step 4: Save raw data to CSV

In [8]:
# Combine all game data
combined_df = pd.concat(all_player_stats, ignore_index=True)

# Save to CSV (raw data, not aggregated yet)
output_path = 'pt3_movement_2015-2016.csv'
combined_df.to_csv(output_path, index=False)

print(f"Saved {len(combined_df)} rows to {output_path}")

Saved 11052 rows to pt3_movement_2015-2016.csv


# Step 5: Concat CSVs

In [None]:
pt1 = pd.read_csv('../../data/pt1_movement_2015-2016.csv')
pt2 = pd.read_csv('../../data/pt2_movement_2015-2016.csv')
pt3 = pd.read_csv('../../data/pt3_movement_2015-2016.csv')

# Combine all 3 dataframes
combined_df = pd.concat([pt1, pt2, pt3], ignore_index=True)
combined_df.to_csv('../../data/movement_2015-2016_full.csv', index=False)