In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import os

from nba_api.stats.static import players
from nba_api.live.nba.endpoints import boxscore
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder

from utils import combine_team_games

#### Initialization of Key Tables

In [2]:
# Mapping Table for IDs -> Names

nba_players = players.get_players()

nba_players = pd.DataFrame(nba_players)
nba_players = nba_players.loc[nba_players.is_active == True]
nba_players = nba_players[['id', 'full_name']]

nba_players.head()

Unnamed: 0,id,full_name
10,1630173,Precious Achiuwa
22,203500,Steven Adams
24,1628389,Bam Adebayo
29,1630534,Ochai Agbaji
36,1631231,James Akinjo


In [3]:
# Mapping Table for IDs -> Teams

nba_teams = teams.get_teams()
nba_teams = pd.DataFrame(nba_teams)

nba_teams.head()

Unnamed: 0,id,full_name,abbreviation,nickname,city,state,year_founded
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Georgia,1949
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946
2,1610612739,Cleveland Cavaliers,CLE,Cavaliers,Cleveland,Ohio,1970
3,1610612740,New Orleans Pelicans,NOP,Pelicans,New Orleans,Louisiana,2002
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966


In [4]:
# Get All Games

min_date = '2000-01-01'

nba_team_ids = list(nba_teams.id)
all_games = pd.DataFrame()

for id in nba_team_ids:
    
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable = id)
    tmp = gamefinder.get_data_frames()[0]
    tmp = tmp.loc[tmp.GAME_DATE >= min_date]

    all_games = pd.concat([all_games, tmp])


# Manual adjustment for NBA Cup for subsequent combine_team_games() to work properly
all_games.loc[(all_games.GAME_ID == '0062400001') & (all_games.TEAM_ABBREVIATION == 'MIL'), 'MATCHUP'] = 'MIL vs. OKC'


# Create season_short

all_games['SEASON_SHORT'] = all_games.SEASON_ID.str[-4:].astype('int')


# Get game types, requires some manual mapping

game_type_dict = {
            '1': 'preseason', 
            '2': 'regular', 
            '4': 'playoffs', 
            '5': 'play-in', 
            '6': 'nba_cup'}

all_games['GAME_TYPE'] = all_games.SEASON_ID.str[:1].map(game_type_dict)

reg_start_date = {
    2000: '2000-10-31',
    2001: '2001-10-30',
    2002: '2002-10-29',
    2003: '2003-10-28',
    2004: '2004-10-26',
    2005: '2005-11-01',
    2006: '2006-11-01',
    2007: '2007-10-30',
    2008: '2008-10-28',
    2009: '2009-10-27',
    2010: '2010-10-26',
    2011: '2011-12-25',
    2012: '2012-10-30',
    2013: '2013-10-29',
    2014: '2014-10-28',
    2015: '2015-10-27',
    2016: '2016-10-25',
    2017: '2017-10-17',
    2018: '2018-10-16',
    2019: '2019-10-22',
    2020: '2020-12-22',
    2021: '2021-10-19',
    2022: '2022-10-18',
    2023: '2023-10-24',
    2024: '2024-10-22'}

for _, val in enumerate(reg_start_date):

    all_games.loc[(all_games.GAME_TYPE == 'regular') & (all_games.SEASON_SHORT == val) & (all_games.GAME_DATE < reg_start_date[val]), 'GAME_TYPE'] = 'preseason'


all_games.loc[all_games.GAME_ID == '0022400477'].head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,SEASON_SHORT,GAME_TYPE
0,22024,1610612737,ATL,Atlanta Hawks,22400477,2025-01-03,ATL @ LAL,L,240,102,...,26.0,39.0,22,10.0,3,8,22,-17.0,2024,regular
0,22024,1610612747,LAL,Los Angeles Lakers,22400477,2025-01-03,LAL vs. ATL,W,240,119,...,39.0,56.0,28,4.0,6,18,21,17.0,2024,regular


In [5]:
# Get single line game stats via combine_team_games()
# _H represents stats relating to the home team, _A reptesents away team

all_games_comb = combine_team_games(all_games)

all_games_comb = all_games_comb.drop(['GAME_TYPE_A', 'SEASON_SHORT_A', 'MATCHUP_A'], axis = 1)
all_games_comb = all_games_comb.rename(columns={'GAME_TYPE_H': 'GAME_TYPE', 'SEASON_SHORT_H': 'SEASON_SHORT', 'MATCHUP_H': 'MATCHUP'})

all_games_comb.loc[all_games_comb.GAME_ID == '0022400477'].head()

Unnamed: 0,SEASON_ID,TEAM_ID_H,TEAM_ABBREVIATION_H,TEAM_NAME_H,GAME_ID,GAME_DATE,MATCHUP,WL_H,MIN_H,PTS_H,...,FT_PCT_A,OREB_A,DREB_A,REB_A,AST_A,STL_A,BLK_A,TOV_A,PF_A,PLUS_MINUS_A
2,22024,1610612747,LAL,Los Angeles Lakers,22400477,2025-01-03,LAL vs. ATL,W,240,119,...,0.778,13.0,26.0,39.0,22,10.0,3,8,22,-17.0


#### Validation

In [6]:
# Check all_games df has 82 games per team per season

team_list = list(nba_teams.abbreviation)
season = 2018

for team in team_list:
    tmp = all_games.loc[(all_games.TEAM_ABBREVIATION == team) & (all_games.GAME_TYPE == 'regular') & (all_games.SEASON_SHORT == season)]
    print('{}: {}'.format(team, tmp.shape[0]))

ATL: 82
BOS: 82
CLE: 82
NOP: 82
CHI: 82
DAL: 82
DEN: 82
GSW: 82
HOU: 82
LAC: 82
LAL: 82
MIA: 82
MIL: 82
MIN: 82
BKN: 82
NYK: 82
ORL: 82
IND: 82
PHI: 82
PHX: 82
POR: 82
SAC: 82
SAS: 82
OKC: 82
TOR: 82
UTA: 82
MEM: 82
WAS: 82
DET: 82
CHA: 82


In [7]:
# Check all_games_comb df has 82 games per team per season

team_list = list(nba_teams.abbreviation)
season = 2014

for team in team_list:
    tmp = all_games_comb.loc[((all_games_comb.TEAM_ABBREVIATION_A == team) | (all_games_comb.TEAM_ABBREVIATION_H == team)) & (all_games_comb.GAME_TYPE == 'regular') & (all_games_comb.SEASON_SHORT == season)]
    print('{}: {}'.format(team, tmp.shape[0]))

ATL: 82
BOS: 82
CLE: 82
NOP: 82
CHI: 82
DAL: 82
DEN: 82
GSW: 82
HOU: 82
LAC: 82
LAL: 82
MIA: 82
MIL: 82
MIN: 82
BKN: 82
NYK: 82
ORL: 82
IND: 82
PHI: 82
PHX: 82
POR: 82
SAC: 82
SAS: 82
OKC: 82
TOR: 82
UTA: 82
MEM: 82
WAS: 82
DET: 82
CHA: 82


#### 3 Point Attempts Per Game

In [8]:
all_games.columns

Index(['SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT',
       'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB',
       'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS', 'SEASON_SHORT',
       'GAME_TYPE'],
      dtype='object')

In [40]:
# Total 3 pointers per game

all_games_comb['FG3M_T'] = all_games_comb.FG3M_H + all_games_comb.FG3M_A
all_games_comb['FG3A_T'] = all_games_comb.FG3A_H + all_games_comb.FG3A_A
all_games_comb['FG3_PCT_T'] = all_games_comb.FG3M_T / all_games_comb.FG3A_T
all_games_comb['FGA_T'] = all_games_comb.FGA_H + all_games_comb.FGA_A


In [41]:
all_games_comb.loc[(all_games_comb.SEASON_SHORT >= 2000) & (all_games_comb.GAME_TYPE == 'regular')] \
    [['SEASON_SHORT', 'FG3A_T', 'FG3_PCT_T', 'FGA_T']] \
    .groupby('SEASON_SHORT').mean().reset_index()

Unnamed: 0,SEASON_SHORT,FG3A_T,FG3_PCT_T,FGA_T
0,2000,27.415475,0.350361,161.197645
1,2001,29.498738,0.352459,162.542473
2,2002,29.362489,0.346366,161.571909
3,2003,29.850294,0.344766,159.631623
4,2004,31.502439,0.352361,160.671545
5,2005,31.961789,0.35606,157.979675
6,2006,33.876221,0.355169,159.42101
7,2007,36.214634,0.359342,163.008943
8,2008,36.246341,0.364618,161.83252
9,2009,36.278049,0.352096,163.405691


In [17]:
team_3p = all_games.loc[(all_games.SEASON_SHORT >= 2000) & (all_games.GAME_TYPE == 'regular')] \
    [['SEASON_SHORT', 'TEAM_ABBREVIATION', 'FG3A']] \
    .groupby(['SEASON_SHORT', 'TEAM_ABBREVIATION']).mean().reset_index()

team_3p = team_3p.pivot(index = 'TEAM_ABBREVIATION', columns = 'SEASON_SHORT', values = 'FG3A').reset_index()

team_3p.head()

SEASON_SHORT,TEAM_ABBREVIATION,2000,2001,2002,2003,2004,2005,2006,2007,2008,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,ATL,11.378049,14.560976,13.914634,15.231707,11.865854,14.073171,12.658537,13.146341,19.914634,...,28.365854,26.060976,31.02439,37.0,36.059701,33.361111,34.402439,30.54878,37.707317,36.514286
1,BKN,,,,,,,,,,...,18.390244,31.597561,35.658537,36.158537,38.138889,36.111111,31.731707,33.792683,36.707317,39.342857
2,BOS,19.914634,23.731707,26.280488,19.5,15.268293,15.731707,15.646341,19.073171,16.52439,...,26.121951,33.426829,30.390244,34.5,34.541667,36.361111,37.121951,42.585366,42.463415,50.285714
3,CHA,,,,,10.743902,15.378049,15.609756,17.597561,16.329268,...,29.390244,28.621951,27.231707,33.939024,34.323077,37.027778,38.170732,32.54878,34.0,41.757576
4,CHH,12.0,12.121951,,,,,,,,...,,,,,,,,,,


In [18]:
team_3p.to_csv('team_3p.csv')