# NBA 2024-25: Utilizing Roles
## Notebook 01: Data Acquisition
This notebook collects game-level player statistics for the 2024-25 NBA regular season using the NBA API. It combines all player game logs into a single DataFrame.

In [2]:
# Install NBA API
!pip install nba_api



In [3]:
# Import libraries
import pandas as pd
import time
from tqdm import tqdm
from nba_api.stats.endpoints import PlayerGameLog, leaguedashplayerstats
from nba_api.stats.static import players

___
## Acquiring Game-Level Player Statistics

In [5]:
# Get all players who registered a statistic during 2024-25 regular season
player_stats = leaguedashplayerstats.LeagueDashPlayerStats(
    season="2024-25",
    season_type_all_star="Regular Season"
).get_data_frames()[0]

print(f"{len(player_stats)} players")

569 players


In [6]:
# Player IDs of these 569 players
player_ids = player_stats["PLAYER_ID"].unique().tolist()

In [7]:
# --- Download each player's game log ---

all_game_logs = []

for pid in tqdm(player_ids, desc="Downloading player game logs"):
    try:
        game_logs = PlayerGameLog(player_id=str(pid), season="2024-25").get_data_frames()[0]
        game_logs["PLAYER_ID"] = pid
        all_game_logs.append(game_logs)
        time.sleep(0.6)
    except Exception as e:
        print(f"Failed: {pid}", e)

Downloading player game logs: 100%|██████████| 569/569 [07:10<00:00,  1.32it/s]


In [8]:
# Combine into one DataFrame
player_game_logs_2024_25 = pd.concat(all_game_logs, ignore_index=True)
player_game_logs_2024_25.head(5)

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,PLAYER_ID
0,22024,1630639,22401197,"Apr 13, 2025",TOR @ SAS,L,21,3,7,0.429,...,0,0,0,0,1,1,14,-5,1,1630639
1,22024,1630639,22401178,"Apr 11, 2025",TOR @ DAL,L,32,3,10,0.3,...,10,2,1,0,0,3,12,-10,1,1630639
2,22024,1630639,22401158,"Apr 09, 2025",TOR vs. CHA,W,28,5,9,0.556,...,6,7,1,0,0,1,14,18,1,1630639
3,22024,1630639,22401134,"Apr 06, 2025",TOR @ BKN,W,22,5,9,0.556,...,1,0,2,0,2,2,13,-6,1,1630639
4,22024,1630639,22401121,"Apr 04, 2025",TOR vs. DET,L,23,2,7,0.286,...,3,1,1,2,1,2,9,-2,1,1630639


In [9]:
# Drop duplicate Player_ID column
player_game_logs_2024_25 = player_game_logs_2024_25.drop(columns=["PLAYER_ID"])

In [10]:
# Confirm columns
player_game_logs_2024_25.columns

Index(['SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE', 'MATCHUP', 'WL',
       'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS', 'PLUS_MINUS', 'VIDEO_AVAILABLE'],
      dtype='object')

> Player IDs are present, but player names are missing.

___
## Get Player Names

In [13]:
# Get player directory
players = pd.DataFrame(players.get_players())
players.head(5)

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False


In [14]:
# Rename columns to match player logs column names
players = players.rename(columns={
    "id": "Player_ID",
    "full_name": "Player_Name"
})

In [15]:
# Merge player names with player game logs
player_game_logs_2024_25 = player_game_logs_2024_25.merge(
    players[["Player_ID", "Player_Name"]],
    on="Player_ID",
    how="left"
)

In [16]:
# Reorder columns so that `PLAYER_NAME` column is first
cols = ["Player_Name"] + [c for c in player_game_logs_2024_25.columns if c != "Player_Name"]
player_game_logs_2024_25 = player_game_logs_2024_25[cols]

In [17]:
# Inspect structure
player_game_logs_2024_25.head(5)

Unnamed: 0,Player_Name,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,A.J. Lawson,22024,1630639,22401197,"Apr 13, 2025",TOR @ SAS,L,21,3,7,...,0,0,0,0,0,1,1,14,-5,1
1,A.J. Lawson,22024,1630639,22401178,"Apr 11, 2025",TOR @ DAL,L,32,3,10,...,8,10,2,1,0,0,3,12,-10,1
2,A.J. Lawson,22024,1630639,22401158,"Apr 09, 2025",TOR vs. CHA,W,28,5,9,...,4,6,7,1,0,0,1,14,18,1
3,A.J. Lawson,22024,1630639,22401134,"Apr 06, 2025",TOR @ BKN,W,22,5,9,...,1,1,0,2,0,2,2,13,-6,1
4,A.J. Lawson,22024,1630639,22401121,"Apr 04, 2025",TOR vs. DET,L,23,2,7,...,2,3,1,1,2,1,2,9,-2,1


In [18]:
# Quick sanity checks
print(f"Number of rows: {len(player_game_logs_2024_25):,}")
print(f"Unique players: {player_game_logs_2024_25["Player_ID"].nunique()}")

Number of rows: 26,306
Unique players: 569


In [19]:
# Drop nonessential columns
player_game_logs_2024_25 = player_game_logs_2024_25.drop(columns=["VIDEO_AVAILABLE"])

In [20]:
# Confirm updated columns
player_game_logs_2024_25.columns

Index(['Player_Name', 'SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE',
       'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
       'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PTS', 'PLUS_MINUS'],
      dtype='object')

___
## Save

In [22]:
# Save to Parquet
player_game_logs_2024_25.to_parquet("NBA_Player_GameLogs_2024_25.parquet")