In [None]:
# 1) Determine needed game_ids (last 10 playoffs)
# 2) Pull general game data and game rotation data
# 3) merge data
# 4) save data

In [1]:
import pandas as pd
import time
import progressbar

## Pulling game data

In [2]:
from nba_api.stats.endpoints import LeagueGameFinder

# Return all Playoff games
gamefinder = LeagueGameFinder(season_type_nullable='Playoffs', league_id_nullable='00')
games = gamefinder.get_data_frames()[0]

# Filter games of the past 5 years
games = games[pd.to_numeric(games.SEASON_ID.str[-4:]) >= 2015]
print('Number of games: {}'.format(games.shape[0]))

# Find associated game_ids
needed_game_ids = list(games.GAME_ID.unique())

Number of games: 824


In [3]:
games[games.SEASON_ID.str[-4:]=='2019']

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,42019,1610612748,MIA,Miami Heat,0041900406,2020-10-11,MIA vs. LAL,L,240,93,...,0.591,9,32,41,25,4,4,13,18,-24.2
1,42019,1610612747,LAL,Los Angeles Lakers,0041900406,2020-10-11,LAL @ MIA,W,239,106,...,0.643,12,34,46,23,5,4,12,22,20.0
2,42019,1610612748,MIA,Miami Heat,0041900405,2020-10-09,MIA @ LAL,W,241,111,...,0.955,9,26,35,26,7,3,13,19,4.8
3,42019,1610612747,LAL,Los Angeles Lakers,0041900405,2020-10-09,LAL vs. MIA,L,240,108,...,0.857,12,29,41,21,10,5,15,21,-3.4
4,42019,1610612747,LAL,Los Angeles Lakers,0041900404,2020-10-06,LAL @ MIA,W,241,102,...,0.857,10,32,42,25,5,4,15,14,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,42019,1610612743,DEN,Denver Nuggets,0041900161,2020-08-17,DEN vs. UTA,W,263,135,...,0.833,8,33,41,23,10,6,11,22,10.0
162,42019,1610612755,PHI,Philadelphia 76ers,0041900121,2020-08-17,PHI @ BOS,L,239,101,...,0.783,15,35,50,23,5,3,18,24,-8.0
163,42019,1610612742,DAL,Dallas Mavericks,0041900151,2020-08-17,DAL @ LAC,L,240,110,...,0.875,5,36,41,18,9,2,21,21,-8.0
164,42019,1610612761,TOR,Toronto Raptors,0041900111,2020-08-17,TOR vs. BKN,W,239,134,...,0.970,9,38,47,26,4,6,11,22,24.0


In [4]:
games.head().transpose()

Unnamed: 0,0,1,2,3,4
SEASON_ID,42019,42019,42019,42019,42019
TEAM_ID,1610612748,1610612747,1610612748,1610612747,1610612747
TEAM_ABBREVIATION,MIA,LAL,MIA,LAL,LAL
TEAM_NAME,Miami Heat,Los Angeles Lakers,Miami Heat,Los Angeles Lakers,Los Angeles Lakers
GAME_ID,0041900406,0041900406,0041900405,0041900405,0041900404
GAME_DATE,2020-10-11,2020-10-11,2020-10-09,2020-10-09,2020-10-06
MATCHUP,MIA vs. LAL,LAL @ MIA,MIA @ LAL,LAL vs. MIA,LAL @ MIA
WL,L,W,W,L,W
MIN,240,239,241,240,241
PTS,93,106,111,108,102


In [5]:
games.groupby(games.SEASON_ID.str[-4:])[['GAME_ID']].count()

Unnamed: 0_level_0,GAME_ID
SEASON_ID,Unnamed: 1_level_1
2015,172
2016,158
2017,164
2018,164
2019,166


## Pulling game rotation data

In [6]:
from nba_api.stats.endpoints import GameRotation

bar = progressbar.ProgressBar(max_value=len(needed_game_ids))
dfs = []
for i, game_id in enumerate(needed_game_ids):
    game_rotation = GameRotation(game_id=game_id)
    
    away_team_df = game_rotation.away_team.get_data_frame()
    away_team_df['HOME_TEAM'] = 0
    
    home_team_df = game_rotation.home_team.get_data_frame()
    home_team_df['HOME_TEAM'] = 1
    
    dfs.append(away_team_df)
    dfs.append(home_team_df)
    
    bar.update(i)
    time.sleep(0.5)
    
full_game_rotation_df = pd.concat(dfs)

 99% (411 of 412) |##################### | Elapsed Time: 0:08:29 ETA:   0:00:01

In [7]:
full_game_rotation_df.shape

(27028, 13)

In [12]:
full_game_rotation_df.to_csv('data/full_game_rotation_data_of_playoffs_for_past_5_years.csv')