In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests

In [7]:
from nba_api.stats.static import teams
nba_teams = teams.get_teams()
team_names = [team['full_name'] for team in nba_teams]
team_names.sort()
team_ids = [team['id'] for team in nba_teams]

# Basic boxscores

In [8]:
from nba_api.stats.endpoints import leaguegamefinder
games = None
for ids in team_ids:
    if games is None:
        gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=ids)
        games = gamefinder.get_data_frames()[0]
    else:
        gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=ids)
        games = pd.concat([games, gamefinder.get_data_frames()[0]])

In [9]:
games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])

In [82]:
games.reset_index(drop=True, inplace=True)

In [10]:
games.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 102159 entries, 0 to 2862
Data columns (total 28 columns):
 #   Column             Non-Null Count   Dtype         
---  ------             --------------   -----         
 0   SEASON_ID          102159 non-null  object        
 1   TEAM_ID            102159 non-null  int64         
 2   TEAM_ABBREVIATION  102159 non-null  object        
 3   TEAM_NAME          102159 non-null  object        
 4   GAME_ID            102159 non-null  object        
 5   GAME_DATE          102159 non-null  datetime64[ns]
 6   MATCHUP            102159 non-null  object        
 7   WL                 102135 non-null  object        
 8   MIN                102159 non-null  int64         
 9   PTS                102159 non-null  int64         
 10  FGM                102159 non-null  int64         
 11  FGA                102159 non-null  int64         
 12  FG_PCT             102152 non-null  float64       
 13  FG3M               102159 non-null  int64     

In [11]:
games['PLUS_MINUS']

0       13.6
1       -8.0
2       18.0
3       -3.0
4        2.0
        ... 
2858     NaN
2859     NaN
2860     NaN
2861     NaN
2862     NaN
Name: PLUS_MINUS, Length: 102159, dtype: float64

In [38]:
games['GAME_DATE'] > "2015-09-01"

0     True
0     True
1     True
0     True
0     True
      ... 
68    True
67    True
69    True
70    True
67    True
Name: GAME_DATE, Length: 2077, dtype: bool

In [39]:
games = games[games['GAME_DATE'] > "2022-09-01"].sort_values(by='GAME_DATE', ascending=False)

In [83]:
games

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22022,1610612737,ATL,Atlanta Hawks,0022200974,2023-03-06,ATL @ MIA,,84,56,...,0.500,4.0,8.0,12.0,14,2.0,0,1,9,13.6
1,22022,1610612738,BOS,Boston Celtics,0022200971,2023-03-06,BOS @ CLE,,121,68,...,0.714,3.0,18.0,21.0,18,3.0,2,5,9,12.0
2,22022,1610612749,MIL,Milwaukee Bucks,0022200973,2023-03-05,MIL @ WAS,W,240,117,...,0.773,7.0,38.0,45.0,30,5.0,6,12,15,6.0
3,22022,1610612744,GSW,Golden State Warriors,0022200964,2023-03-05,GSW @ LAL,L,238,105,...,0.833,7.0,35.0,42.0,27,2.0,6,12,21,-8.0
4,22022,1610612742,DAL,Dallas Mavericks,0022200962,2023-03-05,DAL vs. PHX,L,239,126,...,0.921,5.0,26.0,31.0,15,5.0,0,10,22,-4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2072,12022,1610612749,MIL,Milwaukee Bucks,0012200003,2022-10-01,MIL vs. MEM,L,240,102,...,0.714,17.0,33.0,50.0,22,10.0,3,23,29,-5.0
2073,12022,1610612763,MEM,Memphis Grizzlies,0012200003,2022-10-01,MEM @ MIL,W,239,107,...,0.724,10.0,31.0,41.0,25,13.0,4,22,19,5.0
2074,12022,1610612744,GSW,Golden State Warriors,0012200001,2022-09-30,GSW @ WAS,W,239,96,...,0.775,10.0,52.0,62.0,18,6.0,3,17,27,9.0
2075,12022,1610612746,LAC,LA Clippers,0012200002,2022-09-30,LAC vs. MRA,W,242,121,...,0.698,15.0,40.0,55.0,27,9.0,6,23,23,44.2


In [41]:
game_ids = games['GAME_ID'].unique().tolist()
len(game_ids)

1042

In [16]:
games['TEAM_NAME'].unique()

array(['Atlanta Hawks', 'Boston Celtics', 'LA Clippers',
       'Portland Trail Blazers', 'Indiana Pacers', 'Orlando Magic',
       'New York Knicks', 'San Antonio Spurs', 'Charlotte Hornets',
       'Oklahoma City Thunder', 'Brooklyn Nets', 'Milwaukee Bucks',
       'Utah Jazz', 'Los Angeles Lakers', 'Memphis Grizzlies',
       'Phoenix Suns', 'Houston Rockets', 'Chicago Bulls',
       'Washington Wizards', 'Dallas Mavericks', 'Golden State Warriors',
       'Philadelphia 76ers', 'Sacramento Kings', 'Detroit Pistons',
       'Cleveland Cavaliers', 'Miami Heat', 'Minnesota Timberwolves',
       'Toronto Raptors', 'New Orleans Pelicans', 'Denver Nuggets'],
      dtype=object)

# Advanced boxscores

In [17]:
from nba_api.stats.endpoints import boxscoreadvancedv2

In [64]:
boxscores_advanced = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_ids[10])

In [69]:
boxscores_advanced.get_data_frames()[1].to_numpy()

array([['0022200969', 1610612752, 'Knicks', 'NYK', 'New York', '290:00',
        110.9, 113.9, 108.4, 112.2, 2.5, 1.7, 0.383, 0.9, 12.2, 0.278,
        0.662, 0.492, 16.938, 17.4, 0.574, 0.6, 1.0, 0.198, 98.1, 95.17,
        79.31, 115, 0.506],
       ['0022200969', 1610612738, 'Celtics', 'BOS', 'Boston', '290:00',
        108.4, 112.2, 110.9, 113.9, -2.5, -1.7, 0.628, 1.69, 16.6, 0.338,
        0.722, 0.508, 13.445, 13.9, 0.491, 0.538, 1.0, 0.197, 98.1,
        95.17, 79.31, 115, 0.494]], dtype=object)

The command above/below gets the advanced boxscore from a given game id

boxscores_advanced.get_data_frames()[0] returns the advanced boxscore broken up per player

boxscores_advanced.get_data_frames()[1] returns the advanced boxscore broken up per team

In [77]:
%%time

boxscores_advanced = None
for game_id in game_ids[0:1000]:
    if boxscores_advanced is None:
        gamefinder = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_id)
        boxscores_advanced = gamefinder.get_data_frames()[1]
    else:
        gamefinder = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_id)
        boxscores_advanced = pd.concat([boxscores_advanced, gamefinder.get_data_frames()[1]])

CPU times: user 46.8 s, sys: 255 ms, total: 47.1 s
Wall time: 14min 55s


In [52]:
boxscores_advanced.columns

Index(['GAME_ID', 'TEAM_ID', 'TEAM_NAME', 'TEAM_ABBREVIATION', 'TEAM_CITY',
       'MIN', 'E_OFF_RATING', 'OFF_RATING', 'E_DEF_RATING', 'DEF_RATING',
       'E_NET_RATING', 'NET_RATING', 'AST_PCT', 'AST_TOV', 'AST_RATIO',
       'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'E_TM_TOV_PCT', 'TM_TOV_PCT',
       'EFG_PCT', 'TS_PCT', 'USG_PCT', 'E_USG_PCT', 'E_PACE', 'PACE',
       'PACE_PER40', 'POSS', 'PIE'],
      dtype='object')

Keep Game_ID and Team_name for mapping purposes, but not as features

In [85]:
boxscores_advanced.reset_index(drop=True, inplace=True)

In [90]:
boxscores_advanced = boxscores_advanced.iloc[4:,:]

In [58]:
features_adv_team = ['TEAM_ID', 'OFF_RATING', 'DEF_RATING', 'NET_RATING', 
            'AST_PCT', 'AST_TOV', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 
            'TM_TOV_PCT', 'EFG_PCT', 'TS_PCT', 'PACE', 'POSS']

In [57]:
len(features)

14

In [91]:
boxscores_advanced[features_adv_team]

Unnamed: 0,TEAM_ID,OFF_RATING,DEF_RATING,NET_RATING,AST_PCT,AST_TOV,OREB_PCT,DREB_PCT,REB_PCT,TM_TOV_PCT,EFG_PCT,TS_PCT,PACE,POSS
4,1610612764,111.0,114.7,-3.7,0.457,2.33,0.339,0.848,0.556,9.0,0.463,0.490,101.0,100
5,1610612749,114.7,111.0,3.7,0.769,2.50,0.152,0.661,0.444,11.8,0.602,0.631,101.0,102
6,1610612747,114.1,106.1,8.1,0.690,3.22,0.309,0.754,0.536,9.1,0.500,0.540,99.0,99
7,1610612744,106.1,114.1,-8.1,0.750,2.08,0.246,0.691,0.464,13.1,0.495,0.531,99.0,99
8,1610612756,134.0,128.6,5.4,0.551,3.00,0.317,0.829,0.573,9.3,0.618,0.650,97.5,97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1610612743,111.5,109.4,2.1,0.568,1.75,0.308,0.708,0.500,12.5,0.518,0.547,96.0,96
1996,1610612748,111.3,102.8,8.5,0.737,1.75,0.304,0.654,0.472,15.1,0.511,0.560,106.5,106
1997,1610612745,102.8,111.3,-8.5,0.659,1.08,0.346,0.696,0.528,23.4,0.538,0.558,106.5,107
1998,1610612755,114.1,99.0,15.2,0.575,2.09,0.275,0.679,0.481,11.1,0.533,0.571,98.5,99


In [93]:
boxscores_advanced.to_pickle('boxscores_advanced_team_2000.pkl')

# New

In [None]:
from nba_api.stats.endpoints import boxscoreplayertrackv2

In [None]:
boxscores = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id=game_ids[0])

In [None]:
boxscores.get_data_frames()[0]

In [72]:
boxscores_advanced = None
for game_id in game_ids:
    if boxscores_advanced is None:
        gamefinder = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id=game_id)
        boxscores_advanced = gamefinder.get_data_frames()[0]
    else:
        gamefinder = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id=game_id)
        boxscores_advanced = pd.concat([boxscores_advanced, gamefinder.get_data_frames()[0]])

In [74]:
boxscores_advanced

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,SPD,...,CFGM,CFGA,CFG_PCT,UFGM,UFGA,UFG_PCT,FG_PCT,DFGM,DFGA,DFG_PCT
0,0022200958,1610612737,ATL,Atlanta,1629631,De'Andre Hunter,F,,30:18,4.00,...,2,3,0.667,4,7,0.571,0.599,1,1,1.000
1,0022200958,1610612737,ATL,Atlanta,1628381,John Collins,F,,16:13,4.15,...,0,1,0.000,1,3,0.333,0.250,0,2,0.000
2,0022200958,1610612737,ATL,Atlanta,203991,Clint Capela,C,,24:56,3.95,...,7,9,0.778,1,1,1.000,0.800,2,7,0.286
3,0022200958,1610612737,ATL,Atlanta,1627749,Dejounte Murray,G,,36:02,4.14,...,2,6,0.333,1,8,0.125,0.213,3,3,1.000
4,0022200958,1610612737,ATL,Atlanta,1629027,Trae Young,G,,32:17,3.92,...,1,4,0.250,1,9,0.111,0.153,0,0,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21,0022100968,1610612765,DET,Detroit,203482,Kelly Olynyk,,,17:29,3.99,...,3,4,0.750,0,0,0.000,0.750,1,1,1.000
22,0022100968,1610612765,DET,Detroit,203585,Rodney McGruder,,,13:48,4.28,...,0,1,0.000,0,2,0.000,0.000,1,1,1.000
23,0022100968,1610612765,DET,Detroit,1630587,Isaiah Livers,,,24:33,4.16,...,0,0,0.000,3,6,0.500,0.500,1,1,1.000
24,0022100968,1610612765,DET,Detroit,1628977,Hamidou Diallo,,DNP - Coach's Decision,0:00,0.00,...,0,0,0.000,0,0,0.000,0.000,0,0,0.000
