In [3]:
import numpy as np
import pandas as pd

from tqdm.auto import tqdm
from nba_api.stats.endpoints import leaguegamefinder, boxscoreadvancedv3
from nba_api.stats.static import teams

In [34]:
nba_teams = pd.DataFrame(teams.get_teams())
nba_teams = dict(zip(nba_teams['abbreviation'], nba_teams['id']))
game_schedules = pd.DataFrame()
for team, team_id, in tqdm(nba_teams.items()):
    team_games = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id, season_nullable='2023-24')
    game_schedules = pd.concat([game_schedules, team_games.get_data_frames()[0]])
game_schedules

  0%|          | 0/30 [00:00<?, ?it/s]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612737,ATL,Atlanta Hawks,0022300521,2024-01-10,ATL vs. PHI,W,265,139,...,0.738,16,39,55,32,6,8,12,21,7.0
1,22023,1610612737,ATL,Atlanta Hawks,0022300499,2024-01-07,ATL @ ORL,L,265,110,...,0.586,8,42,50,21,9,10,12,23,-7.0
2,22023,1610612737,ATL,Atlanta Hawks,0022300480,2024-01-05,ATL @ IND,L,238,116,...,0.867,14,25,39,21,6,3,14,11,-34.0
3,22023,1610612737,ATL,Atlanta Hawks,0022300467,2024-01-03,ATL vs. OKC,W,240,141,...,0.939,15,34,49,30,8,5,11,21,3.0
4,22023,1610612737,ATL,Atlanta Hawks,0022300445,2023-12-31,ATL @ WAS,W,240,130,...,0.649,12,46,58,27,5,3,12,18,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34,22023,1610612766,CHA,Charlotte Hornets,0022300063,2023-10-25,CHA vs. ATL,W,240,116,...,0.731,12,39,51,34,5,3,19,21,6.0
35,12023,1610612766,CHA,Charlotte Hornets,0012300060,2023-10-19,CHA vs. BOS,L,241,99,...,0.808,10,39,49,24,8,5,24,17,-28.0
36,12023,1610612766,CHA,Charlotte Hornets,0012300038,2023-10-15,CHA vs. OKC,W,241,117,...,0.591,8,35,43,28,10,7,12,15,2.0
37,12023,1610612766,CHA,Charlotte Hornets,0012300025,2023-10-12,CHA @ WAS,L,241,92,...,0.667,16,50,66,19,9,9,23,24,-6.0


In [6]:
# Don't run twice, or else you have to rerun above cell
cleaned_games = game_schedules
cleaned_games['IS_HOME_TEAM'] = cleaned_games['MATCHUP'].str.contains('vs.').astype(int)
cleaned_games['MATCHUP'] = cleaned_games['MATCHUP'].str.replace('@', 'vs.')
cleaned_games

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,IS_HOME_TEAM
0,22023,1610612737,ATL,Atlanta Hawks,0022300499,2024-01-07,ATL vs. ORL,L,265,110,...,8,42,50,21,9,10,12,23,-7.0,0
1,22023,1610612737,ATL,Atlanta Hawks,0022300480,2024-01-05,ATL vs. IND,L,238,116,...,14,25,39,21,6,3,14,11,-34.0,0
2,22023,1610612737,ATL,Atlanta Hawks,0022300467,2024-01-03,ATL vs. OKC,W,240,141,...,15,34,49,30,8,5,11,21,3.0,1
3,22023,1610612737,ATL,Atlanta Hawks,0022300445,2023-12-31,ATL vs. WAS,W,240,130,...,12,46,58,27,5,3,12,18,4.0,0
4,22023,1610612737,ATL,Atlanta Hawks,0022300431,2023-12-29,ATL vs. SAC,L,239,110,...,18,33,51,29,9,4,15,16,-7.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33,22023,1610612766,CHA,Charlotte Hornets,0022300063,2023-10-25,CHA vs. ATL,W,240,116,...,12,39,51,34,5,3,19,21,6.0,1
34,12023,1610612766,CHA,Charlotte Hornets,0012300060,2023-10-19,CHA vs. BOS,L,241,99,...,10,39,49,24,8,5,24,17,-28.0,1
35,12023,1610612766,CHA,Charlotte Hornets,0012300038,2023-10-15,CHA vs. OKC,W,241,117,...,8,35,43,28,10,7,12,15,2.0,1
36,12023,1610612766,CHA,Charlotte Hornets,0012300025,2023-10-12,CHA vs. WAS,L,241,92,...,16,50,66,19,9,9,23,24,-6.0,0


In [33]:
team_stats = cleaned_games.drop(['SEASON_ID', 'TEAM_NAME'], axis=1)
team_stats = team_stats.set_index(['GAME_DATE'])
team_stats['OPPONENT'] = team_stats['MATCHUP'].str[-3:]
team_stats

Unnamed: 0_level_0,TEAM_ID,TEAM_ABBREVIATION,GAME_ID,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,IS_HOME_TEAM,OPPONENT
GAME_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-07,1610612737,ATL,0022300499,ATL vs. ORL,L,265,110,42,91,0.462,...,42,50,21,9,10,12,23,-7.0,0,ORL
2024-01-05,1610612737,ATL,0022300480,ATL vs. IND,L,238,116,40,93,0.430,...,25,39,21,6,3,14,11,-34.0,0,IND
2024-01-03,1610612737,ATL,0022300467,ATL vs. OKC,W,240,141,48,95,0.505,...,34,49,30,8,5,11,21,3.0,1,OKC
2023-12-31,1610612737,ATL,0022300445,ATL vs. WAS,W,240,130,47,95,0.495,...,46,58,27,5,3,12,18,4.0,0,WAS
2023-12-29,1610612737,ATL,0022300431,ATL vs. SAC,L,239,110,37,94,0.394,...,33,51,29,9,4,15,16,-7.0,1,SAC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-25,1610612766,CHA,0022300063,CHA vs. ATL,W,240,116,43,86,0.500,...,39,51,34,5,3,19,21,6.0,1,ATL
2023-10-19,1610612766,CHA,0012300060,CHA vs. BOS,L,241,99,37,88,0.420,...,39,49,24,8,5,24,17,-28.0,1,BOS
2023-10-15,1610612766,CHA,0012300038,CHA vs. OKC,W,241,117,46,88,0.523,...,35,43,28,10,7,12,15,2.0,1,OKC
2023-10-12,1610612766,CHA,0012300025,CHA vs. WAS,L,241,92,35,94,0.372,...,50,66,19,9,9,23,24,-6.0,0,WAS


In [8]:
home = team_stats[team_stats['IS_HOME_TEAM'] == 1]
away = team_stats[team_stats['IS_HOME_TEAM'] == 0]

In [25]:
merged = home.merge(away, on=['GAME_DATE','GAME_ID'], how='inner')
merged

Unnamed: 0_level_0,TEAM_ID_x,TEAM_ABBREVIATION_x,GAME_ID,MATCHUP_x,WL_x,MIN_x,PTS_x,FGM_x,FGA_x,FG_PCT_x,...,DREB_y,REB_y,AST_y,STL_y,BLK_y,TOV_y,PF_y,PLUS_MINUS_y,IS_HOME_TEAM_y,OPPONENT_y
GAME_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-03,1610612737,ATL,0022300467,ATL vs. OKC,W,240,141,48,95,0.505,...,28,37,30,10,1,13,23,-3.0,0,ATL
2023-12-29,1610612737,ATL,0022300431,ATL vs. SAC,L,239,110,37,94,0.394,...,33,38,35,9,3,12,20,7.0,0,ATL
2023-12-23,1610612737,ATL,0022300393,ATL vs. MEM,L,239,119,45,96,0.469,...,36,51,29,6,7,11,16,6.0,0,ATL
2023-12-18,1610612737,ATL,0022300352,ATL vs. DET,W,240,130,46,85,0.541,...,24,36,23,7,3,13,26,-6.0,0,ATL
2023-12-11,1610612737,ATL,0022300296,ATL vs. DEN,L,240,122,44,96,0.458,...,35,49,29,9,8,16,19,7.0,0,ATL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-30,1610612766,CHA,0022300101,CHA vs. BKN,L,240,121,47,96,0.490,...,36,46,25,4,8,13,19,12.0,0,CHA
2023-10-27,1610612766,CHA,0022300077,CHA vs. DET,L,240,99,33,88,0.375,...,41,53,28,7,6,23,29,12.0,0,CHA
2023-10-25,1610612766,CHA,0022300063,CHA vs. ATL,W,240,116,43,86,0.500,...,30,42,24,12,1,12,19,-6.0,0,CHA
2023-10-19,1610612766,CHA,0012300060,CHA vs. BOS,L,241,99,37,88,0.420,...,39,55,34,22,6,15,19,28.0,0,CHA


In [12]:
box_scores = pd.DataFrame()
for game_id in tqdm(merged['GAME_ID'].unique()):
    game_box_scores = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id)
    box_scores = pd.concat([box_scores, game_box_scores.get_data_frames()[0]])

  0%|          | 0/609 [00:00<?, ?it/s]

In [11]:
box_scores.to_csv('box_scores.csv')

In [32]:
indexed = merged.reset_index().set_index(['GAME_DATE', 'GAME_ID'])
past_games = indexed
for i in range(4):
    columns = [column + '_past' + str(i) for column in merged.columns]
    past_games[columns] = merged.shift(i)
past_games

Unnamed: 0_level_0,Unnamed: 1_level_0,TEAM_ID_x,TEAM_ABBREVIATION_x,MATCHUP_x,WL_x,MIN_x,PTS_x,FGM_x,FGA_x,FG_PCT_x,FG3M_x,...,DREB_y,REB_y,AST_y,STL_y,BLK_y,TOV_y,PF_y,PLUS_MINUS_y,IS_HOME_TEAM_y,OPPONENT_y
GAME_DATE,GAME_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2024-01-03,0022300467,1610612737,ATL,ATL vs. OKC,W,240,141,48,95,0.505,14,...,28,37,30,10,1,13,23,-3.0,0,ATL
2023-12-29,0022300431,1610612737,ATL,ATL vs. SAC,L,239,110,37,94,0.394,15,...,33,38,35,9,3,12,20,7.0,0,ATL
2023-12-23,0022300393,1610612737,ATL,ATL vs. MEM,L,239,119,45,96,0.469,13,...,36,51,29,6,7,11,16,6.0,0,ATL
2023-12-18,0022300352,1610612737,ATL,ATL vs. DET,W,240,130,46,85,0.541,16,...,24,36,23,7,3,13,26,-6.0,0,ATL
2023-12-11,0022300296,1610612737,ATL,ATL vs. DEN,L,240,122,44,96,0.458,17,...,35,49,29,9,8,16,19,7.0,0,ATL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-30,0022300101,1610612766,CHA,CHA vs. BKN,L,240,121,47,96,0.490,8,...,36,46,25,4,8,13,19,12.0,0,CHA
2023-10-27,0022300077,1610612766,CHA,CHA vs. DET,L,240,99,33,88,0.375,7,...,41,53,28,7,6,23,29,12.0,0,CHA
2023-10-25,0022300063,1610612766,CHA,CHA vs. ATL,W,240,116,43,86,0.500,11,...,30,42,24,12,1,12,19,-6.0,0,CHA
2023-10-19,0012300060,1610612766,CHA,CHA vs. BOS,L,241,99,37,88,0.420,4,...,39,55,34,22,6,15,19,28.0,0,CHA
