In [1]:
from IPython.display import clear_output
from nba_api.stats.endpoints import leaguegamelog, boxscoreadvancedv2, boxscorescoringv2
from nba_api.stats.endpoints import leaguedashteamstats, leaguedashteamclutch, leaguedashteamshotlocations, leaguedashteamptshot, leaguedashptteamdefend
from nba_api.stats.endpoints import leaguedashplayerstats, leaguedashplayerclutch, leaguedashplayershotlocations, leaguedashplayerptshot
from nba_api.stats.endpoints import leaguedashptstats, leaguedashoppptshot, leaguedashptdefend

import pandas as pd
import random
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import sqlite3
import time
from tqdm import tqdm
from functools import reduce


In [53]:
pd.options.display.max_columns=50
boxscoreadvancedv2.BoxScoreAdvancedV2(game_id= '0021300002').get_data_frames()[1]

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MIN,E_OFF_RATING,OFF_RATING,E_DEF_RATING,DEF_RATING,E_NET_RATING,NET_RATING,AST_PCT,AST_TOV,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,E_TM_TOV_PCT,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
0,21300002,1610612748,Heat,MIA,Miami,240.000000:00,107.3,108.1,93.9,96.9,13.3,11.1,0.703,1.3,19.9,,0.706,,20.048,20.2,0.59,0.631,1.0,0.197,100.44,98.5,82.08,99,0.599
1,21300002,1610612741,Bulls,CHI,Chicago,240.000000:00,93.9,96.9,107.3,108.1,-13.3,-11.1,0.657,1.21,17.0,0.294,,,18.79,19.4,0.464,0.51,1.0,0.198,100.44,98.5,82.08,98,0.401


In [43]:
def season_string(season):
    return str(season) + '-' + str(season+1)[-2:]


def get_game_dates(season):
    season_str = season_string(season)
    dates = []
    for season_type in ['Regular Season', 'PlayIn', 'Playoffs']:
        games = leaguegamelog.LeagueGameLog(season=season_str, season_type_all_star=season_type).get_data_frames()[0]
        dates.extend(games['GAME_DATE'].unique())
        time.sleep(1)
    return dates



In [50]:
boxscoreadvancedv2.BoxScoreAdvancedV2(game_id='0052200211').get_data_frames()[1]

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MIN,E_OFF_RATING,OFF_RATING,E_DEF_RATING,DEF_RATING,...,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
0,52200211,1610612760,Thunder,OKC,Oklahoma City,240.000000:00,91.5,92.2,117.1,117.6,...,17.5,0.433,0.486,1.0,0.2,103.12,102.5,85.42,103,0.334
1,52200211,1610612750,Timberwolves,MIN,Minnesota,240.000000:00,117.1,117.6,91.5,92.2,...,16.7,0.596,0.635,1.0,0.197,103.12,102.5,85.42,102,0.666


In [7]:
pd.options.display.max_columns=200

In [38]:

def build_team_stat_db(season, conn, measures=['Advanced', 'Misc', 'Four Factors', 'Scoring', 'Opponent', 'Defense'], table_name='boxscores_all'):
    df_holder_main = []
    season_str = season_string(season)
    
    season_game_dates = get_game_dates(season)
    
    for date in tqdm(season_game_dates[2:]):   
        df_holder = []     
        for measure in measures:
            try:
                df = leaguedashteamstats.LeagueDashTeamStats(last_n_games=0,
                                                        season=season_str,
                                                        season_type_all_star = 
                                                        measure_type_detailed_defense=measure,
                                                        rank='N',
                                                        date_from_nullable=date,
                                                        date_to_nullable=date).get_data_frames()[0]
                

                time.sleep(random.randint(2, 3))
                df_holder.append(df)
            except:
                print(f"Error getting data from {date}... skipping")
                time.sleep(90)
                break
                
        df_merged = reduce(lambda  left,right: pd.merge(left, right, left_on=['TEAM_ID'],
                                                        right_on=['TEAM_ID'], suffixes=('', '_remove'),
                                                    how='inner'), df_holder)
        df_merged['GAME_DATE'] = date
        df_merged.drop([i for i in df_merged.columns if (('remove' in i) | ('RANK' in i))], axis=1, inplace=True)
        
        df_holder_main.append(df_merged)
        time.sleep(random.randint(3, 5))
        
        
    season_df = pd.concat(df_holder_main)
    season_df.to_sql(table_name, conn, if_exists='append', index=False)
    cur = conn.cursor()
    cur.execute(f'DELETE FROM {table_name} WHERE rowid NOT IN (SELECT max(rowid) FROM {table_name} GROUP BY TEAM_ID, GAME_DATE)')
    
    return season_df



In [39]:
season_game_dates = get_game_dates(2012)
len(season_game_dates)

208

In [40]:
db_name = 'test_nba_data.db'
conn = sqlite3.connect(db_name)


for s in range(2016, 2024):
    start_time = time.time()
    df = build_team_stat_db(season=s, conn = conn, measures=['Advanced', 'Misc', 'Four Factors', 'Scoring', 'Opponent', 'Defense'], table_name='boxscores_all')
    time.sleep(120)
    end_time = time.time()
    print(f"minutes_taken for season {s}:", (end_time - start_time)/60)


100%|██████████| 202/202 [1:25:52<00:00, 25.51s/it]


minutes_taken for season 2016: 87.98148813645045


 70%|███████   | 148/210 [1:01:57<26:45, 25.90s/it]

Error getting data from 2018-03-24... skipping


100%|██████████| 210/210 [1:29:56<00:00, 25.70s/it]


minutes_taken for season 2017: 91.98542180856069


 89%|████████▊ | 187/211 [1:20:17<11:42, 29.28s/it]

Error getting data from 2019-05-04... skipping


 89%|████████▊ | 187/211 [1:22:08<10:32, 26.35s/it]


TypeError: reduce() of empty sequence with no initial value

In [48]:
conn.close()

In [45]:
measures=['Advanced', 'Misc', 'Four Factors', 'Scoring', 'Opponent', 'Defense']

def build_team_stat_db(season, conn, measure):
    df_holder = []
    season_str = season_string(season)
    
    season_game_dates = get_game_dates(season)  
    num_dates = len(season_game_dates)
    missing_dates = []
    
    print(f"Getting {measure} boxscore data for {season_str}. {num_dates} days")

    for date in tqdm(season_game_dates):   
        for season_type in ['Regular Season', 'PlayIn', 'Playoffs']:
            try:
                df = leaguedashteamstats.LeagueDashTeamStats(last_n_games=0,
                                                        season=season_str,
                                                        season_type_all_star= season_type,
                                                        measure_type_detailed_defense=measure,
                                                        rank='N',
                                                        date_from_nullable=date,
                                                        date_to_nullable=date).get_data_frames()[0]
                
                df['GAME_DATE'] = date
                df['SEASON'] = season_str

                time.sleep(random.randint(2, 3))
                df_holder.append(df)
            except Exception as e:
                print(f"error: {e} getting data from {date}... skipping")
                missing_dates.append(date)
                time.sleep(90)
                break        
    
    if measure == 'Four Factors':
        table_name = 'FourFactors_team_boxscores'
    else:
        table_name = measure + '_team_boxscores'
        
    season_df = pd.concat(df_holder)
        
    season_df.to_sql(table_name, conn, if_exists='append', index=False)
    cur = conn.cursor()
    cur.execute(f'DELETE FROM {table_name} WHERE rowid NOT IN (SELECT max(rowid) FROM {table_name} GROUP BY TEAM_ID, GAME_DATE)')
    
    return season_df



In [46]:
db_name = 'test_nba_data.db'
conn = sqlite3.connect(db_name)


for s in range(2017, 2023):
    for m in measures:
        build_team_stat_db(season=s, conn=conn, measure=m)
        time.sleep(120)
    time.sleep(120)
    

conn.close()

Getting Advanced boxscore data for 2017-18. 212 days


100%|██████████| 212/212 [37:21<00:00, 10.57s/it]


Getting Misc boxscore data for 2017-18. 212 days


100%|██████████| 212/212 [32:36<00:00,  9.23s/it]


Getting Four Factors boxscore data for 2017-18. 212 days


100%|██████████| 212/212 [33:20<00:00,  9.44s/it]


Getting Scoring boxscore data for 2017-18. 212 days


 50%|████▉     | 105/212 [16:06<16:59,  9.53s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Max retries exceeded with url: /stats/leaguedashteamstats?Conference=&DateFrom=2018-02-01&DateTo=2018-02-01&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=&Location=&MeasureType=Scoring&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=2017-18&SeasonSegment=&SeasonType=PlayIn&ShotClockRange=&StarterBench=&TeamID=&TwoWay=&VsConference=&VsDivision= (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001DBCB168A30>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')) getting data from 2018-02-01... skipping


100%|██████████| 212/212 [33:38<00:00,  9.52s/it]  


Getting Opponent boxscore data for 2017-18. 212 days


100%|██████████| 212/212 [33:44<00:00,  9.55s/it]


Getting Defense boxscore data for 2017-18. 212 days


100%|██████████| 212/212 [33:05<00:00,  9.37s/it]


Getting Advanced boxscore data for 2018-19. 213 days


100%|██████████| 213/213 [36:52<00:00, 10.39s/it]


Getting Misc boxscore data for 2018-19. 213 days


100%|██████████| 213/213 [32:50<00:00,  9.25s/it]


Getting Four Factors boxscore data for 2018-19. 213 days


 17%|█▋        | 36/213 [06:07<33:13, 11.26s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Max retries exceeded with url: /stats/leaguedashteamstats?Conference=&DateFrom=2018-11-21&DateTo=2018-11-21&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=&Location=&MeasureType=Four+Factors&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=2018-19&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=&TwoWay=&VsConference=&VsDivision= (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001DBCB1D0610>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')) getting data from 2018-11-21... skipping


100%|██████████| 213/213 [36:29<00:00, 10.28s/it] 


Getting Scoring boxscore data for 2018-19. 213 days


100%|██████████| 213/213 [35:32<00:00, 10.01s/it]


Getting Opponent boxscore data for 2018-19. 213 days


100%|██████████| 213/213 [35:34<00:00, 10.02s/it]


Getting Defense boxscore data for 2018-19. 213 days


100%|██████████| 213/213 [36:32<00:00, 10.29s/it]


Getting Advanced boxscore data for 2019-20. 193 days


 97%|█████████▋| 188/193 [36:49<01:04, 12.91s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2020-10-02... skipping


100%|██████████| 193/193 [39:38<00:00, 12.33s/it]


Getting Misc boxscore data for 2019-20. 193 days


100%|██████████| 193/193 [32:25<00:00, 10.08s/it]


Getting Four Factors boxscore data for 2019-20. 193 days


100%|██████████| 193/193 [34:02<00:00, 10.58s/it]


Getting Scoring boxscore data for 2019-20. 193 days


100%|██████████| 193/193 [32:31<00:00, 10.11s/it]


Getting Opponent boxscore data for 2019-20. 193 days


100%|██████████| 193/193 [32:42<00:00, 10.17s/it]


Getting Defense boxscore data for 2019-20. 193 days


100%|██████████| 193/193 [33:02<00:00, 10.27s/it]


Getting Advanced boxscore data for 2020-21. 191 days


100%|██████████| 191/191 [36:38<00:00, 11.51s/it]


Getting Misc boxscore data for 2020-21. 191 days


100%|██████████| 191/191 [31:15<00:00,  9.82s/it]


Getting Four Factors boxscore data for 2020-21. 191 days


100%|██████████| 191/191 [33:29<00:00, 10.52s/it]


Getting Scoring boxscore data for 2020-21. 191 days


 16%|█▌        | 31/191 [05:10<25:44,  9.65s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Max retries exceeded with url: /stats/leaguedashteamstats?Conference=&DateFrom=2021-01-23&DateTo=2021-01-23&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=&Location=&MeasureType=Scoring&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=2020-21&SeasonSegment=&SeasonType=PlayIn&ShotClockRange=&StarterBench=&TeamID=&TwoWay=&VsConference=&VsDivision= (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001DBCB61A700>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond')) getting data from 2021-01-23... skipping


 27%|██▋       | 51/191 [10:33<25:08, 10.77s/it]  

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Max retries exceeded with url: /stats/leaguedashteamstats?Conference=&DateFrom=2021-02-12&DateTo=2021-02-12&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=&Location=&MeasureType=Scoring&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=2020-21&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=&TwoWay=&VsConference=&VsDivision= (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001DBCB6F9C70>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond')) getting data from 2021-02-12... skipping


 70%|███████   | 134/191 [26:40<15:39, 16.49s/it] 

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2021-05-11... skipping


100%|██████████| 191/191 [38:15<00:00, 12.02s/it]


Getting Opponent boxscore data for 2020-21. 191 days


100%|██████████| 191/191 [31:41<00:00,  9.95s/it]


Getting Defense boxscore data for 2020-21. 191 days


100%|██████████| 191/191 [33:54<00:00, 10.65s/it]


Getting Advanced boxscore data for 2021-22. 213 days


100%|██████████| 213/213 [41:53<00:00, 11.80s/it]


Getting Misc boxscore data for 2021-22. 213 days


100%|██████████| 213/213 [34:36<00:00,  9.75s/it]


Getting Four Factors boxscore data for 2021-22. 213 days


 53%|█████▎    | 113/213 [19:33<25:50, 15.50s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2022-02-11... skipping


100%|██████████| 213/213 [38:18<00:00, 10.79s/it]  


Getting Scoring boxscore data for 2021-22. 213 days


 40%|███▉      | 85/213 [15:00<23:04, 10.81s/it]

error: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) getting data from 2022-01-14... skipping


 95%|█████████▍| 202/213 [36:18<01:52, 10.21s/it] 

error: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) getting data from 2022-05-24... skipping


100%|██████████| 213/213 [39:53<00:00, 11.24s/it]


Getting Opponent boxscore data for 2021-22. 213 days


100%|██████████| 213/213 [37:05<00:00, 10.45s/it]


Getting Defense boxscore data for 2021-22. 213 days


 60%|██████    | 128/213 [22:11<15:08, 10.69s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2022-03-04... skipping


100%|██████████| 213/213 [38:52<00:00, 10.95s/it]  


Getting Advanced boxscore data for 2022-23. 212 days


100%|██████████| 212/212 [44:02<00:00, 12.47s/it]


Getting Misc boxscore data for 2022-23. 212 days


100%|██████████| 212/212 [36:06<00:00, 10.22s/it]


Getting Four Factors boxscore data for 2022-23. 212 days


100%|██████████| 212/212 [37:22<00:00, 10.58s/it]


Getting Scoring boxscore data for 2022-23. 212 days


100%|██████████| 212/212 [35:58<00:00, 10.18s/it]


Getting Opponent boxscore data for 2022-23. 212 days


100%|██████████| 212/212 [37:10<00:00, 10.52s/it] 


Getting Defense boxscore data for 2022-23. 212 days


100%|██████████| 212/212 [37:00<00:00, 10.48s/it]


In [5]:
db_name = 'test_nba_data.db'
table_name='boxscores_all'
conn = sqlite3.connect(db_name)

result = pd.read_sql("SELECT * FROM FourFactors_team_boxscores", con=conn)
result



Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,EFG_PCT,FTA_RATE,TM_TOV_PCT,...,EFG_PCT_RANK,FTA_RATE_RANK,TM_TOV_PCT_RANK,OREB_PCT_RANK,OPP_EFG_PCT_RANK,OPP_FTA_RATE_RANK,OPP_TOV_PCT_RANK,OPP_OREB_PCT_RANK,GAME_DATE,SEASON
0,1610612739,Cleveland Cavaliers,1,1,0,1.0,48.0,0.548,0.202,0.149,...,2,5,3,2,1,4,1,4,2016-10-25,2016-17
1,1610612744,Golden State Warriors,1,0,1,0.0,48.0,0.512,0.212,0.158,...,5,4,5,6,4,5,6,6,2016-10-25,2016-17
2,1610612752,New York Knicks,1,0,1,0.0,48.0,0.420,0.230,0.178,...,6,3,6,3,5,2,4,5,2016-10-25,2016-17
3,1610612757,Portland Trail Blazers,1,1,0,1.0,48.0,0.607,0.293,0.141,...,1,1,2,5,3,1,3,3,2016-10-25,2016-17
4,1610612759,San Antonio Spurs,1,1,0,1.0,48.0,0.541,0.265,0.140,...,3,2,1,1,2,3,2,1,2016-10-25,2016-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,1610612758,Sacramento Kings,1,0,1,0.0,48.0,0.525,0.173,0.122,...,11,21,11,22,27,24,28,17,2017-04-12,2016-17
2456,1610612759,San Antonio Spurs,1,0,1,0.0,48.0,0.500,0.151,0.161,...,17,26,17,6,26,19,6,13,2017-04-12,2016-17
2457,1610612761,Toronto Raptors,1,1,0,1.0,48.0,0.512,0.186,0.099,...,15,20,4,11,2,25,11,5,2017-04-12,2016-17
2458,1610612762,Utah Jazz,1,1,0,1.0,48.0,0.571,0.260,0.187,...,3,10,23,16,10,3,12,23,2017-04-12,2016-17


In [37]:
sorted(result['GAME_DATE'].unique().tolist())
result.sort_values('GAME_DATE')

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,E_OFF_RATING,OFF_RATING,E_DEF_RATING,DEF_RATING,E_NET_RATING,NET_RATING,AST_PCT,AST_TO,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,TM_TOV_PCT,EFG_PCT,TS_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE,PTS_OFF_TOV,PTS_2ND_CHANCE,PTS_FB,PTS_PAINT,OPP_PTS_OFF_TOV,OPP_PTS_2ND_CHANCE,OPP_PTS_FB,OPP_PTS_PAINT,FTA_RATE,OPP_EFG_PCT,OPP_FTA_RATE,OPP_TOV_PCT,OPP_OREB_PCT,PCT_FGA_2PT,PCT_FGA_3PT,PCT_PTS_2PT,PCT_PTS_2PT_MR,PCT_PTS_3PT,PCT_PTS_FB,PCT_PTS_FT,PCT_PTS_OFF_TOV,PCT_PTS_PAINT,PCT_AST_2PM,PCT_UAST_2PM,PCT_AST_3PM,PCT_UAST_3PM,PCT_AST_FGM,PCT_UAST_FGM,OPP_FGM,OPP_FGA,OPP_FG_PCT,OPP_FG3M,OPP_FG3A,OPP_FG3_PCT,OPP_FTM,OPP_FTA,OPP_FT_PCT,OPP_OREB,OPP_DREB,OPP_REB,OPP_AST,OPP_TOV,OPP_STL,OPP_BLK,OPP_BLKA,OPP_PF,OPP_PFD,OPP_PTS,PLUS_MINUS,OPP_PFD1,DREB,STL,BLK,GAME_DATE
112,1610612738,Boston Celtics,1,0,1,0.0,48.0,111.1,110.3,124.9,122.4,-13.8,-12.1,0.615,1.50,18.8,0.179,0.872,0.526,0.165,0.560,0.613,96.2,97.5,81.25,97,0.440,6.0,6.0,27.0,42.0,19.0,6.0,12.0,46.0,0.373,0.595,0.405,0.082,0.128,0.827,0.173,0.617,0.224,0.168,0.252,0.215,0.056,0.393,0.576,0.424,0.833,0.167,0.615,0.385,43.0,79.0,0.544,8.0,16.0,0.500,26.0,32.0,0.813,5.0,31.0,36.0,25.0,8.0,8.0,5.0,2.0,20.0,23.0,120.0,13.0,2,34,4,2,2012-10-30
113,1610612739,Cleveland Cavaliers,1,1,0,1.0,48.0,102.5,101.1,89.6,91.3,13.0,9.8,0.611,1.05,16.3,0.455,0.661,0.575,0.226,0.500,0.530,92.7,92.5,77.08,93,0.575,20.0,24.0,6.0,42.0,13.0,7.0,13.0,32.0,0.278,0.400,0.222,0.141,0.339,0.747,0.253,0.617,0.170,0.223,0.064,0.160,0.213,0.447,0.517,0.483,1.000,0.000,0.611,0.389,32.0,90.0,0.356,8.0,32.0,0.250,12.0,20.0,0.600,18.0,21.0,39.0,26.0,13.0,11.0,10.0,5.0,19.0,21.0,84.0,-10.0,3,36,7,5,2012-10-30
114,1610612742,Dallas Mavericks,1,1,0,1.0,48.0,103.2,110.0,101.5,102.2,1.7,7.8,0.550,1.83,17.5,0.340,0.653,0.500,0.133,0.500,0.533,92.8,89.5,74.58,90,0.543,17.0,7.0,13.0,46.0,17.0,12.0,7.0,56.0,0.212,0.513,0.403,0.157,0.347,0.824,0.176,0.707,0.242,0.152,0.131,0.141,0.172,0.465,0.514,0.486,0.800,0.200,0.550,0.450,38.0,77.0,0.494,3.0,13.0,0.231,12.0,31.0,0.387,15.0,31.0,46.0,24.0,14.0,6.0,5.0,5.0,21.0,25.0,91.0,-8.0,1,31,9,5,2012-10-30
115,1610612747,Los Angeles Lakers,1,0,1,0.0,48.0,101.5,102.2,103.2,110.0,-1.7,-7.8,0.632,1.71,18.5,0.347,0.660,0.500,0.157,0.513,0.502,92.8,89.5,74.58,89,0.457,17.0,12.0,7.0,56.0,17.0,7.0,13.0,46.0,0.403,0.500,0.212,0.133,0.340,0.831,0.169,0.769,0.154,0.099,0.077,0.132,0.187,0.615,0.600,0.400,1.000,0.000,0.632,0.368,40.0,85.0,0.471,5.0,15.0,0.333,14.0,18.0,0.778,9.0,31.0,40.0,22.0,12.0,9.0,5.0,5.0,25.0,21.0,99.0,8.0,3,31,6,5,2012-10-30
116,1610612748,Miami Heat,1,1,0,1.0,48.0,124.9,122.4,111.1,110.3,13.8,12.1,0.581,3.13,19.5,0.128,0.821,0.474,0.082,0.595,0.645,96.2,97.5,81.25,98,0.560,19.0,6.0,12.0,46.0,6.0,6.0,27.0,42.0,0.405,0.560,0.373,0.165,0.179,0.797,0.203,0.583,0.200,0.200,0.100,0.217,0.158,0.383,0.514,0.486,0.875,0.125,0.581,0.419,39.0,75.0,0.520,6.0,13.0,0.462,23.0,28.0,0.821,7.0,34.0,41.0,24.0,16.0,4.0,2.0,5.0,23.0,20.0,107.0,-13.0,5,31,8,5,2012-10-30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425,1610612738,Boston Celtics,1,1,0,1.0,48.0,114.4,112.5,101.5,107.2,12.8,5.3,0.486,1.38,14.9,0.171,0.636,0.458,0.135,0.558,0.611,98.4,96.5,80.42,96,0.592,14.0,4.0,18.0,42.0,11.0,20.0,7.0,24.0,0.338,0.464,0.268,0.113,0.364,0.494,0.506,0.463,0.074,0.333,0.167,0.204,0.130,0.389,0.400,0.600,0.667,0.333,0.486,0.514,36.0,97.0,0.371,18.0,41.0,0.439,14.0,26.0,0.538,17.0,30.0,47.0,24.0,11.0,9.0,0.0,11.0,22.0,22.0,104.0,-4.0,7,39,6,11,2023-10-25
424,1610612737,Atlanta Hawks,1,0,1,0.0,48.0,102.3,105.8,111.1,111.5,-8.8,-5.8,0.615,2.00,16.7,0.276,0.674,0.452,0.115,0.446,0.512,106.0,104.0,86.67,104,0.451,17.0,14.0,19.0,58.0,12.0,14.0,17.0,56.0,0.355,0.564,0.302,0.183,0.326,0.688,0.312,0.618,0.091,0.136,0.173,0.245,0.155,0.527,0.559,0.441,1.000,0.000,0.615,0.385,43.0,86.0,0.500,11.0,37.0,0.297,19.0,26.0,0.731,12.0,39.0,51.0,34.0,19.0,5.0,3.0,1.0,21.0,19.0,116.0,6.0,16,30,12,1,2023-10-25
446,1610612762,Utah Jazz,1,0,1,0.0,48.0,111.1,108.6,120.6,123.8,-9.5,-15.2,0.537,1.57,15.4,0.314,0.719,0.528,0.133,0.500,0.545,105.2,105.0,87.50,105,0.465,14.0,15.0,15.0,64.0,9.0,14.0,10.0,48.0,0.341,0.554,0.196,0.114,0.281,0.692,0.308,0.561,0.000,0.237,0.132,0.202,0.123,0.561,0.500,0.500,0.667,0.333,0.537,0.463,47.0,102.0,0.461,19.0,51.0,0.373,17.0,20.0,0.850,15.0,30.0,45.0,29.0,12.0,6.0,8.0,2.0,25.0,20.0,130.0,16.0,11,38,6,2,2023-10-25
434,1610612746,LA Clippers,1,1,0,1.0,48.0,121.7,124.2,107.3,112.1,14.5,12.1,0.766,2.00,23.5,0.400,0.635,0.526,0.182,0.611,0.634,102.3,99.0,82.50,99,0.601,24.0,19.0,15.0,58.0,21.0,22.0,14.0,54.0,0.178,0.527,0.183,0.182,0.365,0.622,0.378,0.504,0.033,0.390,0.122,0.106,0.195,0.472,0.774,0.226,0.750,0.250,0.766,0.234,44.0,93.0,0.473,10.0,32.0,0.313,13.0,17.0,0.765,15.0,25.0,40.0,20.0,18.0,10.0,4.0,7.0,20.0,20.0,111.0,-12.0,11,31,11,7,2023-10-25


In [47]:
# (Base)|(Opponent)
measures = ['Base', 'Opponent']

def build_team_stat_shot_location_db(season, conn, measure):
    df_holder = []
    season_str = season_string(season)
    
    season_game_dates = get_game_dates(season)  
    num_dates = len(season_game_dates)
    missing_dates = []
    
    print(f"Getting {measure} boxscore shot location data for {season_str}. {num_dates} days")

    for date in tqdm(season_game_dates):   
        for season_type in ['Regular Season', 'PlayIn', 'Playoffs']:
            try:
                df = leaguedashteamshotlocations.LeagueDashTeamShotLocations(last_n_games=0,
                                                                    measure_type_simple=measure,
                                                                    season_type_all_star=season_type,
                                                                    season = season_str,
                                                                    rank='n',
                                                                    date_from_nullable= date,
                                                                    date_to_nullable= date).get_data_frames()[0]
                df.columns = [' '.join(col).strip() for col in df.columns.values]

                df['GAME_DATE'] = date
                df['SEASON'] = season_str

                time.sleep(random.randint(3, 4))
                df_holder.append(df)
            except Exception as e:
                print(f"error: {e} getting data from {date}... skipping")
                missing_dates.append(date)
                time.sleep(90)
                break        
        

    table_name = measure + '_team_boxscores_shot_location'
        
    season_df = pd.concat(df_holder)
        
    season_df.to_sql(table_name, conn, if_exists='append', index=False)
    cur = conn.cursor()
    cur.execute(f'DELETE FROM {table_name} WHERE rowid NOT IN (SELECT max(rowid) FROM {table_name} GROUP BY TEAM_ID, GAME_DATE)')
    
    return season_df



db_name = 'test_nba_data.db'
conn = sqlite3.connect(db_name)

for s in range(2016, 2023):
    for m in measures:
        shot_location_stats_df = build_team_stat_shot_location_db(season=s, conn=conn, measure=m)
        time.sleep(120)
    time.sleep(120)
    

conn.close()


shot_location_stats_df

Getting Base boxscore shot location data for 2016-17. 204 days


 67%|██████▋   | 136/204 [47:15<29:43, 26.23s/it] 

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2017-03-18... skipping


100%|██████████| 204/204 [1:11:26<00:00, 21.01s/it]


Getting Opponent boxscore shot location data for 2016-17. 204 days


 34%|███▍      | 70/204 [23:01<42:35, 19.07s/it]  

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2017-01-05... skipping


 49%|████▉     | 100/204 [34:49<34:23, 19.84s/it] 

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2017-02-04... skipping


 50%|████▉     | 101/204 [37:02<1:32:08, 53.68s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Max retries exceeded with url: /stats/leaguedashteamshotlocations?Conference=&DateFrom=2017-02-05&DateTo=2017-02-05&DistanceRange=By+Zone&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=&Location=&MeasureType=Opponent&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=n&Season=2016-17&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=&VsConference=&VsDivision= (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001DBCB246A90>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')) getting data from 2017-02-05... skipping


 55%|█████▍    | 112/204 [41:51<30:48, 20.09s/it]  

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Max retries exceeded with url: /stats/leaguedashteamshotlocations?Conference=&DateFrom=2017-02-16&DateTo=2017-02-16&DistanceRange=By+Zone&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=&Location=&MeasureType=Opponent&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=n&Season=2016-17&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=&VsConference=&VsDivision= (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001DBCB3F6850>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')) getting data from 2017-02-16... skipping


100%|██████████| 204/204 [1:13:20<00:00, 21.57s/it]


Getting Base boxscore shot location data for 2017-18. 212 days


 31%|███       | 65/212 [22:10<1:07:25, 27.52s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2017-12-22... skipping


100%|██████████| 212/212 [1:11:33<00:00, 20.25s/it]


Getting Opponent boxscore shot location data for 2017-18. 212 days


 11%|█▏        | 24/212 [07:56<1:02:03, 19.80s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2017-11-10... skipping


100%|██████████| 212/212 [1:11:34<00:00, 20.26s/it]


Getting Base boxscore shot location data for 2018-19. 213 days


 10%|█         | 22/213 [07:14<1:07:31, 21.21s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2018-11-07... skipping


 11%|█         | 23/213 [09:28<2:54:07, 54.99s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2018-11-08... skipping


 11%|█▏        | 24/213 [11:28<3:54:42, 74.51s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2018-11-09... skipping


 80%|████████  | 171/213 [1:01:44<13:58, 19.97s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Max retries exceeded with url: /stats/leaguedashteamshotlocations?Conference=&DateFrom=2019-04-16&DateTo=2019-04-16&DistanceRange=By+Zone&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=n&Season=2018-19&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=&VsConference=&VsDivision= (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001DBCB18B3D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')) getting data from 2019-04-16... skipping


100%|██████████| 213/213 [1:16:29<00:00, 21.54s/it]


Getting Opponent boxscore shot location data for 2018-19. 213 days


 59%|█████▊    | 125/213 [42:03<30:08, 20.56s/it] 

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2019-02-26... skipping


100%|██████████| 213/213 [1:13:25<00:00, 20.68s/it]


Getting Base boxscore shot location data for 2019-20. 193 days


 37%|███▋      | 72/193 [24:11<46:22, 22.99s/it]  

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2020-01-04... skipping


 38%|███▊      | 73/193 [26:29<1:55:21, 57.68s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2020-01-05... skipping


100%|██████████| 193/193 [1:08:31<00:00, 21.30s/it]


Getting Opponent boxscore shot location data for 2019-20. 193 days


100%|██████████| 193/193 [1:06:07<00:00, 20.56s/it]


Getting Base boxscore shot location data for 2020-21. 191 days


 46%|████▌     | 88/191 [30:58<34:49, 20.28s/it]  

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2021-03-26... skipping


 47%|████▋     | 89/191 [33:17<1:34:46, 55.75s/it]

error: Expecting value: line 1 column 1 (char 0) getting data from 2021-03-27... skipping


100%|██████████| 191/191 [1:08:39<00:00, 21.57s/it]


Getting Opponent boxscore shot location data for 2020-21. 191 days


  2%|▏         | 3/191 [01:04<1:09:54, 22.31s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2020-12-26... skipping


  2%|▏         | 4/191 [03:20<3:28:38, 66.94s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2020-12-27... skipping


  3%|▎         | 5/191 [05:20<4:26:53, 86.09s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Max retries exceeded with url: /stats/leaguedashteamshotlocations?Conference=&DateFrom=2020-12-28&DateTo=2020-12-28&DistanceRange=By+Zone&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=&Location=&MeasureType=Opponent&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=n&Season=2020-21&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=&VsConference=&VsDivision= (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001DBCB61A1F0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')) getting data from 2020-12-28... skipping


100%|██████████| 191/191 [1:10:16<00:00, 22.07s/it]


Getting Base boxscore shot location data for 2021-22. 213 days


 67%|██████▋   | 143/213 [48:44<27:10, 23.30s/it] 

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2022-03-19... skipping


 68%|██████▊   | 144/213 [50:44<1:00:10, 52.32s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Max retries exceeded with url: /stats/leaguedashteamshotlocations?Conference=&DateFrom=2022-03-20&DateTo=2022-03-20&DistanceRange=By+Zone&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=n&Season=2021-22&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=&VsConference=&VsDivision= (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001DBCB8D4220>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')) getting data from 2022-03-20... skipping


100%|██████████| 213/213 [1:15:14<00:00, 21.19s/it]


Getting Opponent boxscore shot location data for 2021-22. 213 days


 45%|████▌     | 96/213 [33:12<46:59, 24.10s/it]  

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2022-01-25... skipping


 46%|████▌     | 97/213 [35:18<1:45:38, 54.64s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Max retries exceeded with url: /stats/leaguedashteamshotlocations?Conference=&DateFrom=2022-01-26&DateTo=2022-01-26&DistanceRange=By+Zone&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=&Location=&MeasureType=Opponent&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=n&Season=2021-22&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=&VsConference=&VsDivision= (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001DBCB54CF70>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')) getting data from 2022-01-26... skipping


100%|██████████| 213/213 [1:16:41<00:00, 21.60s/it]


Getting Base boxscore shot location data for 2022-23. 212 days


100%|██████████| 212/212 [1:13:09<00:00, 20.70s/it]


Getting Opponent boxscore shot location data for 2022-23. 212 days


  1%|▏         | 3/212 [00:59<1:08:12, 19.58s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2022-10-21... skipping


 60%|██████    | 128/212 [50:18<38:58, 27.84s/it] 

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Max retries exceeded with url: /stats/leaguedashteamshotlocations?Conference=&DateFrom=2023-03-04&DateTo=2023-03-04&DistanceRange=By+Zone&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=&Location=&MeasureType=Opponent&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=n&Season=2022-23&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=&VsConference=&VsDivision= (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001DBCB2429D0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed')) getting data from 2023-03-04... skipping


 63%|██████▎   | 133/212 [54:35<52:10, 39.62s/it]  

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2023-03-09... skipping


 71%|███████   | 150/212 [1:02:52<20:54, 20.23s/it]

error: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30) getting data from 2023-03-26... skipping


100%|██████████| 212/212 [1:27:11<00:00, 24.68s/it]


Unnamed: 0,TEAM_ID,TEAM_NAME,Restricted Area OPP_FGM,Restricted Area OPP_FGA,Restricted Area OPP_FG_PCT,In The Paint (Non-RA) OPP_FGM,In The Paint (Non-RA) OPP_FGA,In The Paint (Non-RA) OPP_FG_PCT,Mid-Range OPP_FGM,Mid-Range OPP_FGA,...,Above the Break 3 OPP_FGA,Above the Break 3 OPP_FG_PCT,Backcourt OPP_FGM,Backcourt OPP_FGA,Backcourt OPP_FG_PCT,Corner 3 OPP_FGM,Corner 3 OPP_FGA,Corner 3 OPP_FG_PCT,GAME_DATE,SEASON
0,1610612738,Boston Celtics,8.0,11.0,0.727,11.0,20.0,0.550,8.0,15.0,...,28.0,0.357,0.0,0.0,0.0,3.0,6.0,0.500,2022-10-18,2022-23
1,1610612744,Golden State Warriors,25.0,35.0,0.714,5.0,14.0,0.357,0.0,5.0,...,30.0,0.233,0.0,0.0,0.0,3.0,10.0,0.300,2022-10-18,2022-23
2,1610612747,Los Angeles Lakers,16.0,29.0,0.552,3.0,10.0,0.300,10.0,16.0,...,33.0,0.394,0.0,1.0,0.0,3.0,10.0,0.300,2022-10-18,2022-23
3,1610612755,Philadelphia 76ers,14.0,18.0,0.778,8.0,15.0,0.533,12.0,14.0,...,25.0,0.320,0.0,0.0,0.0,4.0,10.0,0.400,2022-10-18,2022-23
0,1610612737,Atlanta Hawks,21.0,38.0,0.553,9.0,18.0,0.500,3.0,7.0,...,30.0,0.267,0.0,0.0,0.0,1.0,5.0,0.200,2022-10-19,2022-23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,1610612748,Miami Heat,20.0,29.0,0.690,10.0,19.0,0.526,6.0,14.0,...,14.0,0.357,0.0,0.0,0.0,0.0,4.0,0.000,2023-06-07,2022-23
0,1610612743,Denver Nuggets,14.0,18.0,0.778,9.0,23.0,0.391,4.0,12.0,...,16.0,0.313,0.0,0.0,0.0,3.0,9.0,0.333,2023-06-09,2022-23
1,1610612748,Miami Heat,19.0,28.0,0.679,5.0,16.0,0.313,1.0,7.0,...,22.0,0.455,0.0,0.0,0.0,4.0,6.0,0.667,2023-06-09,2022-23
0,1610612743,Denver Nuggets,15.0,21.0,0.714,7.0,33.0,0.212,2.0,7.0,...,25.0,0.200,0.0,0.0,0.0,4.0,10.0,0.400,2023-06-12,2022-23


In [None]:
leaguedashteamshotlocations.LeagueDashTeamShotLocations(last_n_games=0,
                                                    season = 
                                                    measure_type_simple=measure,
                                                    rank='n',
                                                    date_from_nullable= date,
                                                    date_to_nullable= date).get_data_frames()[0]


In [41]:
db_name = 'test_nba_data.db'
conn = sqlite3.connect(db_name)

pd.read_sql("select * from Base_team_boxscores_shot_location", conn)

Unnamed: 0,TEAM_ID,TEAM_NAME,Restricted Area FGM,Restricted Area FGA,Restricted Area FG_PCT,In The Paint (Non-RA) FGM,In The Paint (Non-RA) FGA,In The Paint (Non-RA) FG_PCT,Mid-Range FGM,Mid-Range FGA,...,Above the Break 3 FGA,Above the Break 3 FG_PCT,Backcourt FGM,Backcourt FGA,Backcourt FG_PCT,Corner 3 FGM,Corner 3 FGA,Corner 3 FG_PCT,GAME_DATE,SEASON
0,1610612739,Cleveland Cavaliers,23,36,0.639,3,7,0.429,6,16,...,23,0.304,0,0,0.0,6,12,0.5,2016-10-25,2016-17
1,1610612744,Golden State Warriors,17,25,0.68,7,12,0.583,9,15,...,22,0.273,0,1,0.0,1,10,0.1,2016-10-25,2016-17
2,1610612752,New York Knicks,10,22,0.455,4,8,0.5,9,30,...,21,0.286,0,1,0.0,3,5,0.6,2016-10-25,2016-17
3,1610612757,Portland Trail Blazers,10,21,0.476,7,17,0.412,9,18,...,16,0.75,0,0,0.0,1,3,0.333,2016-10-25,2016-17
4,1610612759,San Antonio Spurs,18,31,0.581,7,17,0.412,10,26,...,17,0.471,0,0,0.0,4,7,0.571,2016-10-25,2016-17
5,1610612762,Utah Jazz,16,26,0.615,9,12,0.75,7,20,...,18,0.389,0,0,0.0,1,6,0.167,2016-10-25,2016-17
6,1610612738,Boston Celtics,20,28,0.714,5,13,0.385,12,16,...,23,0.304,0,0,0.0,4,9,0.444,2016-10-26,2016-17
7,1610612751,Brooklyn Nets,18,29,0.621,7,15,0.467,3,9,...,37,0.324,0,0,0.0,3,7,0.429,2016-10-26,2016-17
8,1610612766,Charlotte Hornets,23,32,0.719,5,10,0.5,5,24,...,20,0.35,0,1,0.0,0,2,0.0,2016-10-26,2016-17
9,1610612742,Dallas Mavericks,18,30,0.6,3,6,0.5,6,20,...,44,0.341,0,0,0.0,3,4,0.75,2016-10-26,2016-17


In [50]:
# (Base)|(Opponent)
shot_distance_categories = ['Overall', '3 Pointers', '2 Pointers', 'Less Than 6Ft', 'Less Than 10Ft', 'Greater Than 15Ft']

def build_team_stat_shot_distance_db(season, conn, category):
    df_holder = []
    season_str = season_string(season)
    
    season_game_dates = get_game_dates(season)  
    num_dates = len(season_game_dates)
    missing_dates = []
    
    print(f"Getting {category} boxscore shot location data for {season_str}. {num_dates} days")

    for date in tqdm(season_game_dates):   
        for season_type in ['Regular Season', 'PlayIn', 'Playoffs']:
            try:
                df = leaguedashptteamdefend.LeagueDashPtTeamDefend(defense_category = category,
                                                                   per_mode_simple='Totals',
                                                                season = season_str,
                                                                season_type_all_star=season_type,
                                                                date_from_nullable=date,
                                                            date_to_nullable=date).get_data_frames()[0]
                df['GAME_DATE'] = date
                df['SEASON'] = season_str

                time.sleep(random.randint(4, 5))
                df_holder.append(df)
            except Exception as e:
                print(f"error: {e} getting data from {date}... skipping")
                missing_dates.append(date)
                time.sleep(90)
                break        
    

    table_name = category + '_team_boxscore_shot_distance_stats'
        
    season_df = pd.concat(df_holder)
        
    season_df.to_sql(table_name, conn, if_exists='append', index=False)
    cur = conn.cursor()
    cur.execute(f'DELETE FROM {table_name} WHERE rowid NOT IN (SELECT max(rowid) FROM {table_name} GROUP BY TEAM_ID, GAME_DATE)')
    
    return season_df



db_name = 'test_nba_data.db'
conn = sqlite3.connect(db_name)

for s in range(2016, 2017):
    for c in shot_distance_categories:
        shot_distance_stats_df = build_team_stat_shot_distance_db(season=s, conn=conn, category=c)
    #     time.sleep(120)
    # time.sleep(120)
    

conn.close()

shot_distance_stats_df

0.37362637362637363

In [None]:
# (Base)|(Opponent)
measures = ['SpeedDistance', 'Rebounding', 'Possessions', 'CatchShoot', 'PullUpShot', 'Defense', 'Drives', 'Passing', 'ElbowTouch', 'PostTouch', 'PaintTouch', 'Efficiency']

def create_team_tracking_stats_tables(season, conn, measure):
    df_holder = []
    season_str = season_string(season)
    
    season_game_dates = get_game_dates(season)  
    num_dates = len(season_game_dates)
    missing_dates = []
    
    print(f"Getting {measure} boxscore data for {season_str}. {num_dates} days")

    for date in tqdm(season_game_dates[:2]):   
        try:
            df = leaguedashptstats.LeagueDashPtStats(last_n_games=0,
                                                # player_or_team=,
                                                pt_measure_type = measure,
                                                season='2023-24',
                                                date_from_nullable = '2023-10-24',
                                                date_to_nullable = '2023-10-24').get_data_frames()[0]
            df['GAME_DATE'] = date
            df['SEASON'] = season_str

            time.sleep(random.randint(2, 3))
            df_holder.append(df)
        except Exception as e:
            print(f"error: {e} getting data from {date}... skipping")
            missing_dates.append(date)
            time.sleep(90)
            break        
    

    table_name = measure + '_team_tracking'
        
    season_df = pd.concat(df_holder)
        
    season_df.to_sql(table_name, conn, if_exists='append', index=False)
    cur = conn.cursor()
    cur.execute(f'DELETE FROM {table_name} WHERE rowid NOT IN (SELECT max(rowid) FROM {table_name} GROUP BY TEAM_ID, GAME_DATE)')
    
    return season_df



db_name = 'test_nba_data.db'
conn = sqlite3.connect(db_name)

for s in range(2016, 2017):
    for m in measures:
        tracking_stats_df = create_team_tracking_stats_tables(season=s, conn=conn, measure=m)
    #     time.sleep(120)
    # time.sleep(120)
    

conn.close()

shot_distance_stats_df

In [47]:
# (SpeedDistance)|(Rebounding)|(Possessions)|(CatchShoot)|(PullUpShot)|(Defense)|(Drives)|(Passing)|(ElbowTouch)|(PostTouch)|(PaintTouch)|(Efficiency)
measures = ['SpeedDistance', 'Rebounding', 'Possessions', 'CatchShoot', 'PullUpShot', 'Defense', 'Drives', 'Passing', 'ElbowTouch', 'PostTouch', 'PaintTouch', 'Efficiency']
df_holder = []
for measure in measures:

    df = leaguedashptstats.LeagueDashPtStats(last_n_games=0,
                                        # player_or_team=,
                                        pt_measure_type = measure,
                                        season='2023-24',
                                        date_from_nullable = '2023-10-24',
                                        date_to_nullable = '2023-10-24').get_data_frames()[0]
    
    # cat_suffix_dict = {'Overall':'_ALL',
    #                    '3 Pointers': '_3PT',
    #                    '2 Pointers': '_2PT',
    #                    'Less Than 6Ft': '_LT6',
    #                    'Less Than 10Ft': '_LT10',
    #                    'Greater Than 15Ft': '_GT15'}
    
    # df= df.rename(columns = {'FREQ':'FREQ' + cat_sufffix_dict[cat],
    #                      'PLUSMINUS': 'PLUSMINUS' + cat_sufffix_dict[cat]})
    
    df_holder.append(df)
    
df_wide = pd.concat(df_holder, axis=1)

non_rank_cols = [x for x in df_wide.columns if 'RANK' not in x]

print(df_wide[non_rank_cols].T.drop_duplicates().T.shape)

df_wide[non_rank_cols].T.drop_duplicates().T




(4, 139)


Unnamed: 0,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GP,W,L,MIN,MIN.1,DIST_FEET,DIST_MILES,DIST_MILES_OFF,DIST_MILES_DEF,AVG_SPEED,AVG_SPEED_OFF,AVG_SPEED_DEF,OREB,OREB_CONTEST,OREB_UNCONTEST,OREB_CONTEST_PCT,OREB_CHANCES,OREB_CHANCE_PCT,OREB_CHANCE_DEFER,OREB_CHANCE_PCT_ADJ,AVG_OREB_DIST,DREB,DREB_CONTEST,DREB_UNCONTEST,DREB_CONTEST_PCT,DREB_CHANCES,DREB_CHANCE_PCT,DREB_CHANCE_DEFER,DREB_CHANCE_PCT_ADJ,AVG_DREB_DIST,REB,REB_CONTEST,REB_UNCONTEST,REB_CONTEST_PCT,REB_CHANCES,REB_CHANCE_PCT,REB_CHANCE_DEFER,REB_CHANCE_PCT_ADJ,AVG_REB_DIST,POINTS,TOUCHES,FRONT_CT_TOUCHES,TIME_OF_POSS,AVG_SEC_PER_TOUCH,AVG_DRIB_PER_TOUCH,PTS_PER_TOUCH,ELBOW_TOUCHES,POST_TOUCHES,PAINT_TOUCHES,PTS_PER_ELBOW_TOUCH,PTS_PER_POST_TOUCH,PTS_PER_PAINT_TOUCH,CATCH_SHOOT_FGM,CATCH_SHOOT_FGA,CATCH_SHOOT_FG_PCT,CATCH_SHOOT_FG_PCT.1,CATCH_SHOOT_PTS,CATCH_SHOOT_PTS.1,CATCH_SHOOT_FG3M,CATCH_SHOOT_FG3A,CATCH_SHOOT_FG3_PCT,CATCH_SHOOT_EFG_PCT,PULL_UP_FGM,PULL_UP_FGA,PULL_UP_FG_PCT,PULL_UP_PTS,PULL_UP_FG3M,PULL_UP_FG3A,PULL_UP_FG3_PCT,PULL_UP_EFG_PCT,STL,BLK,DEF_RIM_FGM,DEF_RIM_FGA,DEF_RIM_FG_PCT,DRIVES,DRIVE_FGM,DRIVE_FGA,DRIVE_FG_PCT,DRIVE_FTM,DRIVE_FTA,DRIVE_FT_PCT,DRIVE_PTS,DRIVE_PTS_PCT,DRIVE_PASSES,DRIVE_PASSES_PCT,DRIVE_AST,DRIVE_AST_PCT,DRIVE_TOV,DRIVE_TOV_PCT,DRIVE_PF,DRIVE_PF_PCT,PASSES_MADE,AST,FT_AST,SECONDARY_AST,POTENTIAL_AST,AST_PTS_CREATED,AST_ADJ,AST_TO_PASS_PCT,AST_TO_PASS_PCT_ADJ,ELBOW_TOUCH_FGM,ELBOW_TOUCH_FGA,ELBOW_TOUCH_FG_PCT,ELBOW_TOUCH_FTM,ELBOW_TOUCH_FTA,ELBOW_TOUCH_FT_PCT,ELBOW_TOUCH_PTS,ELBOW_TOUCH_PASSES,ELBOW_TOUCH_PASSES_PCT,ELBOW_TOUCH_AST,ELBOW_TOUCH_AST_PCT,ELBOW_TOUCH_TOV,ELBOW_TOUCH_FOULS,ELBOW_TOUCH_FOULS_PCT,POST_TOUCH_FGM,POST_TOUCH_FGA,POST_TOUCH_FG_PCT,POST_TOUCH_PTS,POST_TOUCH_PASSES_PCT,POST_TOUCH_AST,POST_TOUCH_AST_PCT,POST_TOUCH_FOULS_PCT,PAINT_TOUCH_FGM,PAINT_TOUCH_FGA,PAINT_TOUCH_FG_PCT,PAINT_TOUCH_FTM,PAINT_TOUCH_FT_PCT,PAINT_TOUCH_PTS,PAINT_TOUCH_PASSES,PAINT_TOUCH_PASSES_PCT,PAINT_TOUCH_AST_PCT,PAINT_TOUCH_TOV,PAINT_TOUCH_TOV_PCT,PAINT_TOUCH_FOULS_PCT,EFF_FG_PCT
0,1610612743,DEN,Denver Nuggets,1,1,0,240.0,215.0,90119,17.1,9.6,7.4,3.91,4.04,3.72,9,4,5,0.444,24,0.375,1,0.391,7.0,33,8,25,0.242,62,0.532,5,0.579,8.7,42,12,30,0.286,83,0.506,6,0.545,8.3,119,403,221,22.2,3.3,2.35,0.295,22,6,24,0.227,0.333,1.042,11,28,0.393,0.4,31,34,9,25,0.36,0.554,10,19,0.526,24,4,7,0.571,0.632,9,6,24,36,0.667,35,6,13,0.462,0,0,0.0,12,0.343,18,0.514,5,0.143,3,0.086,0,0.0,278,29,3,2,52,75,34,0.104,0.122,2,4,0.5,0,0,0.0,5,15,0.682,3,0.2,0,0,0.0,1,5,0.2,2,0.167,1,0.167,0.0,12,20,0.6,0,0.0,25,2,0.083,0.042,1,0.042,0.0,0.604
1,1610612744,GSW,Golden State Warriors,1,0,1,240.0,240.0,98299,18.6,10.3,8.3,4.19,4.58,3.78,18,9,8,0.5,38,0.474,3,0.514,9.2,31,11,20,0.355,57,0.544,3,0.574,6.0,49,20,28,0.408,95,0.516,6,0.551,7.2,104,419,220,20.5,2.94,2.24,0.248,16,6,29,0.5,1.167,0.483,8,29,0.276,0.25,23,23,7,27,0.259,0.397,7,23,0.304,17,3,14,0.214,0.37,11,6,18,27,0.667,40,7,20,0.35,3,4,0.75,19,0.475,15,0.375,2,0.05,1,0.025,2,0.05,286,19,2,3,54,50,24,0.066,0.084,3,6,0.5,2,2,1.0,8,9,0.563,2,0.222,0,1,0.063,2,4,0.5,7,0.167,0,0.0,0.167,7,14,0.5,0,0.0,14,14,0.483,0.034,0,0.0,0.034,0.406
2,1610612747,LAL,Los Angeles Lakers,1,0,1,240.0,237.0,89069,16.9,8.7,8.2,3.88,4.38,3.45,13,5,7,0.385,28,0.464,0,0.464,10.4,31,6,25,0.194,48,0.646,1,0.66,7.1,44,11,32,0.25,73,0.603,1,0.611,8.0,107,358,174,18.1,3.03,2.25,0.299,9,3,15,0.444,0.667,0.533,9,23,0.391,0.391,27,27,9,20,0.45,0.587,3,19,0.158,7,1,9,0.111,0.184,5,4,22,34,0.647,45,9,23,0.391,13,16,0.813,31,0.689,11,0.244,2,0.044,2,0.044,8,0.178,240,23,1,1,41,58,25,0.096,0.104,2,6,0.333,0,0,0.0,4,2,0.222,0,0.0,0,0,0.0,1,2,0.5,2,0.333,0,0.0,0.0,4,9,0.444,0,0.0,8,4,0.267,0.067,1,0.067,0.0,0.511
3,1610612756,PHX,Phoenix Suns,1,1,0,240.0,240.0,95962,18.2,9.4,8.8,4.1,4.29,3.92,17,9,8,0.529,31,0.548,3,0.607,11.7,43,16,27,0.372,79,0.544,8,0.606,7.5,60,25,35,0.417,109,0.55,11,0.612,8.7,108,380,192,20.2,3.18,2.42,0.284,17,7,20,0.824,1.0,0.95,10,26,0.385,0.37,29,29,9,24,0.375,0.558,9,30,0.3,20,2,9,0.222,0.333,5,7,20,38,0.526,43,8,24,0.333,3,4,0.75,19,0.442,15,0.349,2,0.047,1,0.023,2,0.047,250,23,2,3,44,62,28,0.092,0.112,6,9,0.667,1,2,0.5,14,6,0.353,2,0.333,0,1,0.059,2,4,0.5,7,0.143,0,0.0,0.143,8,11,0.727,1,0.5,19,7,0.35,0.05,1,0.05,0.05,0.5


In [49]:




df = leaguedashoppptshot.LeagueDashOppPtShot(season='2023-24',
                                    date_from_nullable = '2023-10-24',
                                    date_to_nullable = '2023-10-24').get_data_frames()[0]


non_rank_cols = [x for x in df.columns if 'RANK' not in x]

print(df[non_rank_cols].T.drop_duplicates().T.shape)

df[non_rank_cols].T.drop_duplicates().T



(4, 16)


Unnamed: 0,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,GP,FGM,FGA,FG_PCT,EFG_PCT,FG2A_FREQUENCY,FG2M,FG2A,FG2_PCT,FG3A_FREQUENCY,FG3M,FG3A,FG3_PCT
0,1610612756,Phoenix Suns,PHX,1,36,101,0.356,0.406,0.574,26,58,0.448,0.426,10,43,0.233
1,1610612744,Golden State Warriors,GSW,1,42,95,0.442,0.5,0.653,31,62,0.5,0.347,11,33,0.333
2,1610612747,Los Angeles Lakers,LAL,1,48,91,0.527,0.604,0.626,34,57,0.596,0.374,14,34,0.412
3,1610612743,Denver Nuggets,DEN,1,41,90,0.456,0.511,0.678,31,61,0.508,0.322,10,29,0.345
