In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import os

from nba_api.stats.static import players
from nba_api.live.nba.endpoints import boxscore
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder

from utils import combine_team_games

In [2]:
# Mapping Table for IDs -> Names

nba_players = players.get_players()

nba_players = pd.DataFrame(nba_players)
nba_players = nba_players.loc[nba_players.is_active == True]
nba_players = nba_players[['id', 'full_name']]

nba_players.head()

Unnamed: 0,id,full_name
10,1630173,Precious Achiuwa
22,203500,Steven Adams
24,1628389,Bam Adebayo
29,1630534,Ochai Agbaji
36,1631231,James Akinjo


In [3]:
# Mapping Table for IDs -> Teams

nba_teams = teams.get_teams()
nba_teams = pd.DataFrame(nba_teams)

nba_teams.head()

Unnamed: 0,id,full_name,abbreviation,nickname,city,state,year_founded
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Georgia,1949
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946
2,1610612739,Cleveland Cavaliers,CLE,Cavaliers,Cleveland,Ohio,1970
3,1610612740,New Orleans Pelicans,NOP,Pelicans,New Orleans,Louisiana,2002
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966


In [33]:
# Get All Games

min_date = '2000-01-01'

nba_team_ids = list(nba_teams.id)
all_games = pd.DataFrame()

for id in nba_team_ids:
    
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable = id)
    tmp = gamefinder.get_data_frames()[0]
    tmp = tmp.loc[tmp.GAME_DATE >= min_date]

    all_games = pd.concat([all_games, tmp])


# Manual adjustment for NBA Cup for subsequent combine_team_games() to work properly
all_games.loc[(all_games.GAME_ID == '0062400001') & (all_games.TEAM_ABBREVIATION == 'MIL'), 'MATCHUP'] = 'MIL vs. OKC'

all_games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22024,1610612737,ATL,Atlanta Hawks,22400477,2025-01-03,ATL @ LAL,L,240,102,...,0.778,13.0,26.0,39.0,22,10.0,3,8,22,-17.0
1,22024,1610612737,ATL,Atlanta Hawks,22400461,2025-01-01,ATL @ DEN,L,240,120,...,0.667,12.0,32.0,44.0,34,10.0,5,13,16,-19.0
2,22024,1610612737,ATL,Atlanta Hawks,22400438,2024-12-29,ATL @ TOR,W,240,136,...,0.743,14.0,26.0,40.0,30,22.0,8,14,19,29.0
3,22024,1610612737,ATL,Atlanta Hawks,22400427,2024-12-28,ATL vs. MIA,W,240,120,...,0.773,9.0,38.0,47.0,34,7.0,2,11,16,10.0
4,22024,1610612737,ATL,Atlanta Hawks,22400413,2024-12-26,ATL vs. CHI,W,240,141,...,0.941,12.0,32.0,44.0,32,9.0,5,12,13,8.0


In [34]:
# Get single line game stats via combine_team_games()
# _H represents stats relating to the home team, _A reptesents away team

all_games_comb = combine_team_games(all_games)


# Create season_short

all_games_comb['SEASON_SHORT'] = all_games_comb.SEASON_ID.str[-4:].astype('int')


# Get game types, requires some manual mapping

game_type_dict = {
            '1': 'preseason', 
            '2': 'regular', 
            '4': 'playoffs', 
            '5': 'play-in', 
            '6': 'nba_cup'}

all_games_comb['GAME_TYPE'] = all_games_comb.SEASON_ID.str[:1].map(game_type_dict)


reg_start_date = {
    2000: '2000-10-31',
    2001: '2001-10-30',
    2002: '2002-10-29',
    2003: '2003-10-28',
    2004: '2004-10-26',
    2005: '2005-11-01',
    2006: '2006-11-01',
    2007: '2007-10-30',
    2008: '2008-10-28',
    2009: '2009-10-27',
    2010: '2010-10-26',
    2011: '2011-12-25',
    2012: '2012-10-30',
    2013: '2013-10-29',
    2014: '2014-10-28',
    2015: '2015-10-27',
    2016: '2016-10-25',
    2017: '2017-10-17',
    2018: '2018-10-16',
    2019: '2019-10-22',
    2020: '2020-12-22',
    2021: '2021-10-19',
    2022: '2022-10-18',
    2023: '2023-10-24',
    2024: '2024-10-22'}

for _, val in enumerate(reg_start_date):

    all_games_comb.loc[(all_games_comb.GAME_TYPE == 'regular') & (all_games_comb.SEASON_SHORT == val) & (all_games_comb.GAME_DATE < reg_start_date[val]), 'GAME_TYPE'] = 'preseason'


all_games_comb.head()

Unnamed: 0,SEASON_ID,TEAM_ID_H,TEAM_ABBREVIATION_H,TEAM_NAME_H,GAME_ID,GAME_DATE,MATCHUP_H,WL_H,MIN_H,PTS_H,...,DREB_A,REB_A,AST_A,STL_A,BLK_A,TOV_A,PF_A,PLUS_MINUS_A,SEASON_SHORT,GAME_TYPE
2,22024,1610612747,LAL,Los Angeles Lakers,22400477,2025-01-03,LAL vs. ATL,W,240,119,...,26.0,39.0,22,10.0,3,8,22,-17.0,2024,regular
6,22024,1610612743,DEN,Denver Nuggets,22400461,2025-01-01,DEN vs. ATL,W,240,139,...,32.0,44.0,34,10.0,5,13,16,-19.0,2024,regular
10,22024,1610612761,TOR,Toronto Raptors,22400438,2024-12-29,TOR vs. ATL,L,240,107,...,26.0,40.0,30,22.0,8,14,19,29.0,2024,regular
13,22024,1610612737,ATL,Atlanta Hawks,22400427,2024-12-28,ATL vs. MIA,W,240,120,...,34.0,42.0,23,9.0,4,8,19,-10.0,2024,regular
17,22024,1610612737,ATL,Atlanta Hawks,22400413,2024-12-26,ATL vs. CHI,W,240,141,...,27.0,36.0,36,8.0,1,14,26,-8.0,2024,regular


In [35]:
all_games_comb.columns

Index(['SEASON_ID', 'TEAM_ID_H', 'TEAM_ABBREVIATION_H', 'TEAM_NAME_H',
       'GAME_ID', 'GAME_DATE', 'MATCHUP_H', 'WL_H', 'MIN_H', 'PTS_H', 'FGM_H',
       'FGA_H', 'FG_PCT_H', 'FG3M_H', 'FG3A_H', 'FG3_PCT_H', 'FTM_H', 'FTA_H',
       'FT_PCT_H', 'OREB_H', 'DREB_H', 'REB_H', 'AST_H', 'STL_H', 'BLK_H',
       'TOV_H', 'PF_H', 'PLUS_MINUS_H', 'TEAM_ID_A', 'TEAM_ABBREVIATION_A',
       'TEAM_NAME_A', 'MATCHUP_A', 'WL_A', 'MIN_A', 'PTS_A', 'FGM_A', 'FGA_A',
       'FG_PCT_A', 'FG3M_A', 'FG3A_A', 'FG3_PCT_A', 'FTM_A', 'FTA_A',
       'FT_PCT_A', 'OREB_A', 'DREB_A', 'REB_A', 'AST_A', 'STL_A', 'BLK_A',
       'TOV_A', 'PF_A', 'PLUS_MINUS_A', 'SEASON_SHORT', 'GAME_TYPE'],
      dtype='object')

#### 3 Point Attempts Per Game

In [40]:
# Total 3 pointers per game

all_games_comb['FG3M_T'] = all_games_comb.FG3M_H + all_games_comb.FG3M_A
all_games_comb['FG3A_T'] = all_games_comb.FG3A_H + all_games_comb.FG3A_A
all_games_comb['FG3_PCT_T'] = all_games_comb.FG3M_T / all_games_comb.FG3A_T
all_games_comb['FGA_T'] = all_games_comb.FGA_H + all_games_comb.FGA_A


In [41]:
all_games_comb.loc[(all_games_comb.SEASON_SHORT >= 2000) & (all_games_comb.GAME_TYPE == 'regular')] \
    [['SEASON_SHORT', 'FG3A_T', 'FG3_PCT_T', 'FGA_T']] \
    .groupby('SEASON_SHORT').mean().reset_index()

Unnamed: 0,SEASON_SHORT,FG3A_T,FG3_PCT_T,FGA_T
0,2000,27.415475,0.350361,161.197645
1,2001,29.498738,0.352459,162.542473
2,2002,29.362489,0.346366,161.571909
3,2003,29.850294,0.344766,159.631623
4,2004,31.502439,0.352361,160.671545
5,2005,31.961789,0.35606,157.979675
6,2006,33.876221,0.355169,159.42101
7,2007,36.214634,0.359342,163.008943
8,2008,36.246341,0.364618,161.83252
9,2009,36.278049,0.352096,163.405691


In [21]:
all_games_comb[['MATCHUP_H', 'GAME_DATE', 'FG3M_H', 'FG3A_H', 'FG3_PCT_H', 'FG3M_A', 'FG3A_A', 'FG3_PCT_A', 'FG3M_T', 'FG3A_T']]

Unnamed: 0,MATCHUP_H,GAME_DATE,FG3M_H,FG3A_H,FG3_PCT_H,FG3M_A,FG3A_A,FG3_PCT_A,FG3M_T,FG3A_T
2,LAL vs. ATL,2025-01-03,13,30.0,0.433,6,34.0,0.176,19,64.0
6,DEN vs. ATL,2025-01-01,12,30.0,0.400,16,46.0,0.348,28,76.0
10,TOR vs. ATL,2024-12-29,7,24.0,0.292,18,45.0,0.400,25,69.0
13,ATL vs. MIA,2024-12-28,15,37.0,0.405,13,41.0,0.317,28,78.0
17,ATL vs. CHI,2024-12-26,17,45.0,0.378,22,43.0,0.512,39,88.0
...,...,...,...,...,...,...,...,...,...,...
86243,DET vs. CHA,2011-02-02,7,19.0,0.368,3,10.0,0.300,10,29.0
86248,CHA vs. DET,2010-12-27,10,17.0,0.588,11,19.0,0.579,21,36.0
86251,DET vs. CHA,2010-11-05,7,13.0,0.538,9,20.0,0.450,16,33.0
86256,CHA vs. DET,2010-10-16,4,12.0,0.333,10,21.0,0.476,14,33.0
