In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests

In [3]:
from nba_api.stats.static import teams
nba_teams = teams.get_teams()
team_names = [team['full_name'] for team in nba_teams]
team_names.sort()
team_ids = [team['id'] for team in nba_teams]

# Basic boxscores

In [34]:
from nba_api.stats.endpoints import leaguegamefinder
games = None
for ids in team_ids:
    if games is None:
        gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=ids)
        games = gamefinder.get_data_frames()[0]
    else:
        gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=ids)
        games = pd.concat([games, gamefinder.get_data_frames()[0]])

In [35]:
games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])

In [36]:
games.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 102169 entries, 0 to 2862
Data columns (total 28 columns):
 #   Column             Non-Null Count   Dtype         
---  ------             --------------   -----         
 0   SEASON_ID          102169 non-null  object        
 1   TEAM_ID            102169 non-null  int64         
 2   TEAM_ABBREVIATION  102169 non-null  object        
 3   TEAM_NAME          102169 non-null  object        
 4   GAME_ID            102169 non-null  object        
 5   GAME_DATE          102169 non-null  datetime64[ns]
 6   MATCHUP            102169 non-null  object        
 7   WL                 102147 non-null  object        
 8   MIN                102169 non-null  int64         
 9   PTS                102169 non-null  int64         
 10  FGM                102169 non-null  int64         
 11  FGA                102169 non-null  int64         
 12  FG_PCT             102162 non-null  float64       
 13  FG3M               102169 non-null  int64     

In [37]:
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22022,1610612737,ATL,Atlanta Hawks,22200974,2023-03-06,ATL @ MIA,L,239,128,...,0.739,12.0,25.0,37.0,28,5.0,0,10,25,-2.0
1,22022,1610612737,ATL,Atlanta Hawks,22200958,2023-03-04,ATL @ MIA,L,239,109,...,0.867,10.0,27.0,37.0,22,12.0,1,16,23,-8.0
2,22022,1610612737,ATL,Atlanta Hawks,22200947,2023-03-03,ATL vs. POR,W,239,129,...,0.813,10.0,33.0,43.0,30,10.0,5,17,19,18.0
3,22022,1610612737,ATL,Atlanta Hawks,22200923,2023-02-28,ATL vs. WAS,L,239,116,...,0.895,13.0,29.0,42.0,21,4.0,7,9,17,-3.0
4,22022,1610612737,ATL,Atlanta Hawks,22200911,2023-02-26,ATL vs. BKN,W,240,129,...,0.821,13.0,33.0,46.0,20,7.0,2,15,18,2.0


In [38]:
games['GAME_DATE'] > "2015-09-01"

0        True
1        True
2        True
3        True
4        True
        ...  
2858    False
2859    False
2860    False
2861    False
2862    False
Name: GAME_DATE, Length: 102169, dtype: bool

In [39]:
games = games[games['GAME_DATE'] > "2018-09-01"].sort_values(by='GAME_DATE', ascending=False)

In [40]:
games.reset_index(drop=True, inplace=True)

In [41]:
games = games[12:]

In [42]:
games.reset_index(drop=True, inplace=True)

In [47]:
game_ids = games['GAME_ID'].unique().tolist()
len(game_ids)

6560

In [71]:
game_ids.index('0022100001')

2449

In [74]:
game_ids[2450:]

4110

In [16]:
games['TEAM_NAME'].unique()

array(['Atlanta Hawks', 'Boston Celtics', 'LA Clippers',
       'Portland Trail Blazers', 'Indiana Pacers', 'Orlando Magic',
       'New York Knicks', 'San Antonio Spurs', 'Charlotte Hornets',
       'Oklahoma City Thunder', 'Brooklyn Nets', 'Milwaukee Bucks',
       'Utah Jazz', 'Los Angeles Lakers', 'Memphis Grizzlies',
       'Phoenix Suns', 'Houston Rockets', 'Chicago Bulls',
       'Washington Wizards', 'Dallas Mavericks', 'Golden State Warriors',
       'Philadelphia 76ers', 'Sacramento Kings', 'Detroit Pistons',
       'Cleveland Cavaliers', 'Miami Heat', 'Minnesota Timberwolves',
       'Toronto Raptors', 'New Orleans Pelicans', 'Denver Nuggets'],
      dtype=object)

# Advanced boxscores

In [75]:
from nba_api.stats.endpoints import boxscoreadvancedv2

The command above/below gets the advanced boxscore from a given game id

boxscores_advanced.get_data_frames()[0] returns the advanced boxscore broken up per player

boxscores_advanced.get_data_frames()[1] returns the advanced boxscore broken up per team

In [76]:
%%time
boxscores_advanced_player = None
boxscores_advanced_team = None
for game_id in game_ids[2450:]:
    if boxscores_advanced_team is None:
        gamefinder = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_id)
        boxscores_advanced_team = gamefinder.get_data_frames()[1]
        boxscores_advanced_player = gamefinder.get_data_frames()[0]
    else:
        gamefinder = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_id)
        boxscores_advanced_team = pd.concat([boxscores_advanced_team, gamefinder.get_data_frames()[1]])
        boxscores_advanced_player = pd.concat([boxscores_advanced_player, gamefinder.get_data_frames()[0]])

ReadTimeout: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)

In [77]:
boxscores_advanced_team.to_pickle('boxscores_advanced_team_part2.pkl')

In [78]:
boxscores_advanced_player.to_pickle('boxscores_advanced_player_part2.pkl')

In [79]:
boxscores_advanced_team

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MIN,E_OFF_RATING,OFF_RATING,E_DEF_RATING,DEF_RATING,...,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
0,0012100064,1610612749,Bucks,MIL,Milwaukee,240:00,103.1,104.0,115.0,114.0,...,15.2,0.511,0.543,1.0,0.198,99.54,99.5,82.92,99,0.439
1,0012100064,1610612742,Mavericks,DAL,Dallas,240:00,115.0,114.0,103.1,104.0,...,13.0,0.520,0.547,1.0,0.198,99.54,99.5,82.92,100,0.561
0,0012100062,1610612764,Wizards,WAS,Washington,240:00,107.9,106.6,103.3,108.5,...,16.0,0.580,0.603,1.0,0.202,108.06,106.0,88.33,106,0.530
1,0012100062,1610612752,Knicks,NYK,New York,240:00,103.3,108.5,107.9,106.6,...,12.3,0.485,0.521,1.0,0.199,108.06,106.0,88.33,106,0.470
0,0012100063,1610612763,Grizzlies,MEM,Memphis,240:00,98.7,100.0,110.7,112.4,...,21.9,0.534,0.551,1.0,0.199,106.46,105.0,87.50,105,0.419
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,0021800163,1610612745,Rockets,HOU,Houston,240:00,87.5,87.9,104.7,107.7,...,18.7,0.445,0.463,1.0,0.197,92.48,91.0,75.83,91,0.320
0,0021800166,1610612749,Bucks,MIL,Milwaukee,240:00,119.8,127.6,104.0,105.7,...,11.4,0.559,0.594,1.0,0.197,109.28,105.0,87.50,105,0.582
1,0021800166,1610612744,Warriors,GSW,Golden State,240:00,104.0,105.7,119.8,127.6,...,17.1,0.540,0.574,1.0,0.201,109.28,105.0,87.50,105,0.418
0,0021800165,1610612757,Trail Blazers,POR,Portland,240:00,113.4,119.6,107.4,107.1,...,7.2,0.464,0.531,1.0,0.199,100.06,97.5,81.25,97,0.541


In [80]:
boxscores_advanced_player

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,...,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
0,0012100064,1610612742,DAL,Dallas,1627827,Dorian Finney-Smith,Dorian,F,,24:39,...,13.3,0.250,0.376,0.190,0.193,97.64,97.36,81.14,50,0.083
1,0012100064,1610612742,DAL,Dallas,1628467,Maxi Kleber,Maxi,F,,14:20,...,0.0,0.900,0.900,0.152,0.157,93.37,95.44,79.53,29,0.182
2,0012100064,1610612742,DAL,Dallas,203939,Dwight Powell,Dwight,C,,11:08,...,0.0,0.833,0.872,0.269,0.281,98.64,101.32,84.43,24,0.200
3,0012100064,1610612742,DAL,Dallas,1628425,Sterling Brown,Sterling,G,,19:41,...,0.0,0.611,0.611,0.188,0.191,97.64,97.54,81.29,41,0.149
4,0012100064,1610612742,DAL,Dallas,1628973,Jalen Brunson,Jalen,G,,23:34,...,5.9,0.773,0.773,0.211,0.217,94.79,96.75,80.62,48,0.213
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21,0021800165,1610612757,POR,Portland,203917,Nik Stauskas,Nik,,,17:54,...,8.3,0.438,0.450,0.238,0.242,100.45,100.56,83.80,37,0.057
22,0021800165,1610612757,POR,Portland,203552,Seth Curry,Seth,,,13:03,...,0.0,0.300,0.425,0.194,0.197,96.15,99.31,82.76,26,0.025
23,0021800165,1610612757,POR,Portland,203086,Meyers Leonard,Meyers,,,11:55,...,0.0,0.625,0.717,0.179,0.180,97.72,96.67,80.56,24,0.205
24,0021800165,1610612757,POR,Portland,1627735,Wade Baldwin IV,Wade,,DNP - Coach's Decision,,...,,,,0.000,0.000,,,,0,


Keep Game_ID and Team_name for mapping purposes, but not as features

In [85]:
boxscores_advanced.reset_index(drop=True, inplace=True)

In [90]:
boxscores_advanced = boxscores_advanced.iloc[4:,:]

In [58]:
features_adv_team = ['TEAM_ID', 'OFF_RATING', 'DEF_RATING', 'NET_RATING', 
            'AST_PCT', 'AST_TOV', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 
            'TM_TOV_PCT', 'EFG_PCT', 'TS_PCT', 'PACE', 'POSS']

In [57]:
len(features)

14

In [91]:
boxscores_advanced[features_adv_team]

Unnamed: 0,TEAM_ID,OFF_RATING,DEF_RATING,NET_RATING,AST_PCT,AST_TOV,OREB_PCT,DREB_PCT,REB_PCT,TM_TOV_PCT,EFG_PCT,TS_PCT,PACE,POSS
4,1610612764,111.0,114.7,-3.7,0.457,2.33,0.339,0.848,0.556,9.0,0.463,0.490,101.0,100
5,1610612749,114.7,111.0,3.7,0.769,2.50,0.152,0.661,0.444,11.8,0.602,0.631,101.0,102
6,1610612747,114.1,106.1,8.1,0.690,3.22,0.309,0.754,0.536,9.1,0.500,0.540,99.0,99
7,1610612744,106.1,114.1,-8.1,0.750,2.08,0.246,0.691,0.464,13.1,0.495,0.531,99.0,99
8,1610612756,134.0,128.6,5.4,0.551,3.00,0.317,0.829,0.573,9.3,0.618,0.650,97.5,97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1610612743,111.5,109.4,2.1,0.568,1.75,0.308,0.708,0.500,12.5,0.518,0.547,96.0,96
1996,1610612748,111.3,102.8,8.5,0.737,1.75,0.304,0.654,0.472,15.1,0.511,0.560,106.5,106
1997,1610612745,102.8,111.3,-8.5,0.659,1.08,0.346,0.696,0.528,23.4,0.538,0.558,106.5,107
1998,1610612755,114.1,99.0,15.2,0.575,2.09,0.275,0.679,0.481,11.1,0.533,0.571,98.5,99


In [93]:
boxscores_advanced.to_pickle('boxscores_advanced_team_2000.pkl')

In [12]:
boxscores_advanced = pd.read_pickle('boxscores_advanced_team_2000.pkl')

In [13]:
boxscores_advanced.head()

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MIN,E_OFF_RATING,OFF_RATING,E_DEF_RATING,DEF_RATING,...,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
4,22200973,1610612764,Wizards,WAS,Washington,240:00,108.5,111.0,119.8,114.7,...,9.0,0.463,0.49,1.0,0.198,99.98,101.0,84.17,100,0.443
5,22200973,1610612749,Bucks,MIL,Milwaukee,240:00,119.8,114.7,108.5,111.0,...,11.8,0.602,0.631,1.0,0.201,99.98,101.0,84.17,102,0.557
6,22200964,1610612747,Lakers,LAL,Los Angeles,240:00,109.0,114.1,100.1,106.1,...,9.1,0.5,0.54,1.0,0.197,104.3,99.0,82.5,99,0.556
7,22200964,1610612744,Warriors,GSW,Golden State,240:00,100.1,106.1,109.0,114.1,...,13.1,0.495,0.531,1.0,0.197,104.3,99.0,82.5,99,0.444
8,22200962,1610612756,Suns,PHX,Phoenix,240:00,130.0,134.0,127.6,128.6,...,9.3,0.618,0.65,1.0,0.2,99.36,97.5,81.25,97,0.539


# New

In [None]:
from nba_api.stats.endpoints import boxscoreplayertrackv2

In [None]:
boxscores = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id=game_ids[0])

In [None]:
boxscores.get_data_frames()[0]

In [72]:
boxscores_advanced = None
for game_id in game_ids:
    if boxscores_advanced is None:
        gamefinder = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id=game_id)
        boxscores_advanced = gamefinder.get_data_frames()[0]
    else:
        gamefinder = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id=game_id)
        boxscores_advanced = pd.concat([boxscores_advanced, gamefinder.get_data_frames()[0]])

In [74]:
boxscores_advanced

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,SPD,...,CFGM,CFGA,CFG_PCT,UFGM,UFGA,UFG_PCT,FG_PCT,DFGM,DFGA,DFG_PCT
0,0022200958,1610612737,ATL,Atlanta,1629631,De'Andre Hunter,F,,30:18,4.00,...,2,3,0.667,4,7,0.571,0.599,1,1,1.000
1,0022200958,1610612737,ATL,Atlanta,1628381,John Collins,F,,16:13,4.15,...,0,1,0.000,1,3,0.333,0.250,0,2,0.000
2,0022200958,1610612737,ATL,Atlanta,203991,Clint Capela,C,,24:56,3.95,...,7,9,0.778,1,1,1.000,0.800,2,7,0.286
3,0022200958,1610612737,ATL,Atlanta,1627749,Dejounte Murray,G,,36:02,4.14,...,2,6,0.333,1,8,0.125,0.213,3,3,1.000
4,0022200958,1610612737,ATL,Atlanta,1629027,Trae Young,G,,32:17,3.92,...,1,4,0.250,1,9,0.111,0.153,0,0,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21,0022100968,1610612765,DET,Detroit,203482,Kelly Olynyk,,,17:29,3.99,...,3,4,0.750,0,0,0.000,0.750,1,1,1.000
22,0022100968,1610612765,DET,Detroit,203585,Rodney McGruder,,,13:48,4.28,...,0,1,0.000,0,2,0.000,0.000,1,1,1.000
23,0022100968,1610612765,DET,Detroit,1630587,Isaiah Livers,,,24:33,4.16,...,0,0,0.000,3,6,0.500,0.500,1,1,1.000
24,0022100968,1610612765,DET,Detroit,1628977,Hamidou Diallo,,DNP - Coach's Decision,0:00,0.00,...,0,0,0.000,0,0,0.000,0.000,0,0,0.000
