In [1]:
from nba_api.stats.static import players, teams
from nba_api.stats.library.parameters import SeasonAll
from nba_api.stats.endpoints import leaguegamelog
from nba_api.stats.endpoints import boxscoreadvancedv2
from nba_api.stats.endpoints import boxscorescoringv2
from nba_api.stats.endpoints import boxscoreplayertrackv2
from nba_api.stats.endpoints import boxscoretraditionalv2
import random
import pandas as pd
import numpy as np
from tqdm import tqdm
import time as time
from time import sleep
from IPython.display import clear_output
import sqlite3

In [2]:
pd.options.display.max_columns=60

In [3]:
def season_string(season):
    return str(season) + '-' + str(season+1)[-2:]


### Full Game Stats

In [11]:
def add_basic_boxscores(conn, start_season, end_season, if_exists='append'):
    """This function pulls basic team boxscores from the NBA_API package 
    and appends (or creates a new table if not exists) it to the table team_basic_boxscores in the sqlite db"""
    
    table_name = 'team_basic_boxscores'
    
    if if_exists == 'replace':
        conn.execute('DROP TABLE IF EXISTS ' + table_name)
        conn.execute('VACUUM')
        
    conn.execute("""CREATE TABLE IF NOT EXISTS {} (SEASON TEXT, TEAM_ID INTEGER, TEAM_ABBREVIATION TEXT, 
        TEAM_NAME TEXT, GAME_ID TEXT, GAME_DATE DATE, MATCHUP TEXT, WL TEXT, MIN INTEGER, FGM INTEGER, FGA INTEGER, 
        FG_PCT FLOAT, FG3M INTEGER, FG3A  INTEGER, FG3_PCT FLOAT, FTM INTEGER, FTA INTEGER, FT_PCT FLOAT, OREB INTEGER,
        DREB INTEGER, REB INTEGER, AST INTEGER, STL INTEGER, BLK INTEGER, TOV INTEGER, PF INTEGER, PTS INTEGER, 
        PLUS_MINUS INTEGER)""".format(table_name))    
    
    for season in range(start_season, end_season+1):
        season_str = season_string(season)

        for season_type in ['Regular Season', 'PlayIn',  'Playoffs']:
            boxscores = leaguegamelog.LeagueGameLog(season=season_str, season_type_all_star=season_type).get_data_frames()[0]
            season_boxscores.append(boxscores)
            sleep(2)
        season_df = pd.concat(season_boxscores)
        season_df['SEASON'] = season_str
        season_df.drop(columns = ['SEASON_ID', 'VIDEO_AVAILABLE'], inplace=True)
        
        season_df.to_sql(table_name, conn, if_exists='append', index=False)
        
        sleep(3)
        
    cur = conn.cursor()
    cur.execute('DELETE FROM {} WHERE rowid NOT IN (SELECT min(rowid) FROM {} GROUP BY TEAM_ID, GAME_ID)'.format(table_name, table_name))
    conn.commit()
    
    return None

In [12]:
def add_advanced_boxscores(conn, start_season, end_season, if_exists='append'):
    """
    This function pulls advanced team boxscores from the NBA_API package 
    and appends (or creates a new table if not exists) it to the table team_advanced_boxscores in the sqlite db
    
    Note: Because of timeout errors and that we have to pull each game's individually, each season takes 1-2 hours.
    If some games were not pulled in certain seasons, you can use the update functions to gather those individual games.
    """
    
    table_name = 'team_advanced_boxscores'
    game_ids_not_added = []
    
    if if_exists == 'replace':
        conn.execute('DROP TABLE IF EXISTS ' + table_name)
        conn.execute('VACUUM')
    
    conn.execute('''CREATE TABLE IF NOT EXISTS {} (GAME_ID TEXT, TEAM_ID INTEGER, TEAM_NAME TEXT, 
        TEAM_ABBREVIATION TEXT, TEAM_CITY TEXT, MIN TEXT, E_OFF_RATING FLOAT, OFF_RATING FLOAT, E_DEF_RATING FLOAT, 
        DEF_RATING FLOAT, E_NET_RATING FLOAT, NET_RATING FLOAT, AST_PCT FLOAT, AST_TOV FLOAT, 
        AST_RATIO FLOAT, OREB_PCT FLOAT, DREB_PCT FLOAT, REB_PCT FLOAT, E_TM_TOV_PCT FLOAT, 
        TM_TOV_PCT FLOAT, EFG_PCT FLOAT, TS_PCT FLOAT, USG_PCT FLOAT, E_USG_PCT FLOAT, E_PACE FLOAT, 
        PACE FLOAT, PACE_PER40 FLOAT, POSS FLOAT, PIE FLOAT)'''.format(table_name))
    
    
    for season in range(start_season, end_season+1):
        season_str = season_string(season)
        season_team_boxscores = []

        for season_type in ['Regular Season', 'PlayIn', 'Playoffs'] :
            logs = leaguegamelog.LeagueGameLog(season=season, season_type_all_star=season_type).get_data_frames()[0]
            game_ids = logs['GAME_ID'].unique()

            for i in range(0, len(game_ids), 100):
                print('games {} to {}'.format(i, i+100))
                for game_id in tqdm(game_ids[i:i+100], desc='progress'):
                    try:
                        team_boxscores = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id).get_data_frames()[1]                    
                        team_boxscores.to_sql(table_name, conn, if_exists='append', index=False)
                    except:
                        game_ids_not_added.append(game_id)
                    sleep(2)
                sleep(120)
                clear_output(wait=True)

        sleep(120)
        
    cur = conn.cursor()
    cur.execute('DELETE FROM {} WHERE rowid NOT IN (SELECT min(rowid) FROM {} GROUP BY TEAM_ID, GAME_ID)'.format(table_name, table_name))
    conn.commit()
    
    return None


In [13]:
def add_scoring_boxscores(conn, start_season, end_season, if_exists='append'):
    """
    This function pulls scoring team boxscores from the NBA_API package 
    and appends (or creates a new table if not exists) it to the table team_scoring_boxscores in the sqlite db.
    
    Note: Because of timeout errors and that we have to pull each game's individually, each season takes 1-2 hours.
    If some games were not pulled in certain seasons, you can use the update functions to gather those individual games.
    """
    
    table_name = 'team_scoring_boxscores'
    game_ids_not_added = []

    if if_exists == 'replace':
        conn.execute('DROP TABLE IF EXISTS ' + table_name)
        conn.execute('VACUUM')
    
    conn.execute('''GAME_ID TEXT, TEAM_ID INTEGER, TEAM_NAME TEXT, TEAM_ABBREVIATION TEXT, TEAM_CITY TEXT,
       MIN TEXT, PCT_FGA_2PT FLOAT, PCT_FGA_3PT FLOAT, PCT_PTS_2PT FLOAT, PCT_PTS_2PT_MR FLOAT,
       PCT_PTS_3PT FLOAT, PCT_PTS_FB FLOAT, PCT_PTS_FT FLOAT, PCT_PTS_OFF_TOV FLOAT,
       PCT_PTS_PAINT FLOAT, PCT_AST_2PM FLOAT, PCT_UAST_2PM FLOAT, PCT_AST_3PM FLOAT,
       PCT_UAST_3PM FLOAT, PCT_AST_FGM FLOAT, PCT_UAST_FGM FLOAT)'''.format(table_name))
    
    
    for season in range(start_season, end_season+1):
        season_str = season_string(season)
        season_team_boxscores = []

        for season_type in ['Regular Season', 'PlayIn', 'Playoffs'] :
            logs = leaguegamelog.LeagueGameLog(season=season, season_type_all_star=season_type).get_data_frames()[0]
            game_ids = logs['GAME_ID'].unique()

            for i in range(0, len(game_ids), 100):
                print('games {} to {}'.format(i, i+100))
                for game_id in tqdm(game_ids[i:i+100], desc='progress'):
                    try:
                        scoring_boxscores = boxscorescoringv2.BoxScoretrackingV2(game_id).get_data_frames()[1]
                        tracking_boxscores.to_sql(table_name, conn, if_exists='append', index=False)
                    except:
                        game_ids_not_added.append(game_id)
                    sleep(2)
                sleep(120)
                clear_output(wait=True)

        sleep(120)
        
    cur = conn.cursor()
    cur.execute('DELETE FROM {} WHERE rowid NOT IN (SELECT min(rowid) FROM {} GROUP BY TEAM_ID, GAME_ID)'.format(table_name, table_name))
    conn.commit()
    
    return game_ids_not_added


In [14]:
def add_scoring_boxscores(conn, start_season, end_season, if_exists='append'):
    """
    This function pulls scoring team boxscores from the NBA_API package 
    and appends (or creates a new table if not exists) it to the table team_scoring_boxscores in the sqlite db.
    
    Note: Because of timeout errors and that we have to pull each game's individually, each season takes 1-2 hours.
    If some games were not pulled in certain seasons, you can use the update functions to gather those individual games.
    """
    
    table_name = 'team_scoring_boxscores'
    game_ids_not_added = []

    if if_exists == 'replace':
        conn.execute('DROP TABLE IF EXISTS ' + table_name)
        conn.execute('VACUUM')
    
    conn.execute('''GAME_ID TEXT, TEAM_ID INTEGER, TEAM_NAME TEXT, TEAM_ABBREVIATION TEXT, TEAM_CITY TEXT,
       MIN TEXT, PCT_FGA_2PT FLOAT, PCT_FGA_3PT FLOAT, PCT_PTS_2PT FLOAT, PCT_PTS_2PT_MR FLOAT,
       PCT_PTS_3PT FLOAT, PCT_PTS_FB FLOAT, PCT_PTS_FT FLOAT, PCT_PTS_OFF_TOV FLOAT,
       PCT_PTS_PAINT FLOAT, PCT_AST_2PM FLOAT, PCT_UAST_2PM FLOAT, PCT_AST_3PM FLOAT,
       PCT_UAST_3PM FLOAT, PCT_AST_FGM FLOAT, PCT_UAST_FGM FLOAT)'''.format(table_name))
    
    
    for season in range(start_season, end_season+1):
        season_str = season_string(season)
        season_team_boxscores = []

        for season_type in ['Regular Season', 'PlayIn', 'Playoffs'] :
            logs = leaguegamelog.LeagueGameLog(season=season, season_type_all_star=season_type).get_data_frames()[0]
            game_ids = logs['GAME_ID'].unique()

            for i in range(0, len(game_ids), 100):
                print('games {} to {}'.format(i, i+100))
                for game_id in tqdm(game_ids[i:i+100], desc='progress'):
                    try:
                        scoring_boxscores = boxscorescoringv2.BoxScoreScoringV2(game_id).get_data_frames()[1]
                        scoring_boxscores.to_sql(table_name, conn, if_exists='append', index=False)
                    except:
                        game_ids_not_added.append(game_id)
                    sleep(2)
                sleep(120)
                clear_output(wait=True)

        sleep(120)
        
    cur = conn.cursor()
    cur.execute('DELETE FROM {} WHERE rowid NOT IN (SELECT min(rowid) FROM {} GROUP BY TEAM_ID, GAME_ID)'.format(table_name, table_name))
    conn.commit()
    
    return game_ids_not_added


In [10]:
game_id = leaguegamelog.LeagueGameLog(season=2013).get_data_frames()[0]['GAME_ID'].values[0]

df = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id).get_data_frames()[0]

In [36]:
start_season = 2013
end_season = 2013
game_ids_not_added = []

for season in range(start_season, end_season+1):
    season_str = season_string(season)
    season_team_tracking_boxscores = []
    season_player_tracking_boxscores = []

    for season_type in ['Regular Season', 'PlayIn', 'Playoffs'] :
        logs = leaguegamelog.LeagueGameLog(season=season, season_type_all_star=season_type).get_data_frames()[0]
        game_ids = logs['GAME_ID'].unique()[:10]

        for i in range(0, len(game_ids), 100):
            print('games {} to {}'.format(i, i+100))
            for game_id in tqdm(game_ids[i:i+100], desc='progress'):
                try:
                    tracking_boxscores = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id).get_data_frames()
                    
                    # scoring_boxscores.to_sql(table_name, conn, if_exists='append', index=False)
                    season_team_tracking_boxscores.append(tracking_boxscores[1])
                    season_player_tracking_boxscores.append(tracking_boxscores[0])
                    
                except:
                    game_ids_not_added.append(game_id)
                sleep(1)
    clear_output(wait=True)


team_tracking_df = pd.concat(season_team_tracking_boxscores)
player_tracking_df = pd.concat(season_player_tracking_boxscores)

games 0 to 100


progress: 100%|██████████| 10/10 [00:18<00:00,  1.84s/it]


games 0 to 100


progress: 100%|██████████| 10/10 [00:19<00:00,  1.99s/it]


In [45]:
def add_tracking_boxscores(conn, start_season, end_season, if_exists='append'):
    """
    This function pulls scoring team boxscores from the NBA_API package 
    and appends (or creates a new table if not exists) it to the table team_tracking_boxscores in the sqlite db.
    
    Note: Because of timeout errors and that we have to pull each game's individually, each season takes 1-2 hours.
    If some games were not pulled in certain seasons, you can use the update functions to gather those individual games.
    """
    
    player_table_name = 'player_tracking_boxscores'
    team_table_name = 'team_tracking_boxscores'

    game_ids_not_added = []

    if if_exists == 'replace':
        conn.execute('DROP TABLE IF EXISTS ' + player_table_name)
        conn.execute('VACUUM')
    
    conn.execute('''CREATE TABLE IF NOT EXISTS {} (
                GAME_ID TEXT, TEAM_ID TEXT, TEAM_ABBREVIATION TEXT, 
                TEAM_CITY TEXT, PLAYER_ID TEXT, PLAYER_NAME TEXT,
                START_POSITION TEXT, COMMENT TEXT, MIN INTEGER, SPD FLOAT, 
                DIST FLOAT, ORBC INTEGER, DRBC INTEGER, RBC INTEGER, 
                TCHS INTEGER, SAST INTEGER, FTAST INTEGER, PASS INTEGER,
                AST INTEGER, CFGM INTEGER, CFGA INTEGER, CFG_PCT FLOAT,
                UFGM INTEGER, UFGA INTEGER, UFG_PCT FLOAT, FG_PCT FLOAT, 
                DFGM INTEGER, DFGA INTEGER, DFG_PCT FLOAT)'''.format(player_table_name))
        
    if if_exists == 'replace':
        conn.execute('DROP TABLE IF EXISTS ' + team_table_name)
        conn.execute('VACUUM')
    
    conn.execute('''CREATE TABLE IF NOT EXISTS {} (
                GAME_ID TEXT, TEAM_ID TEXT, TEAM_NAME TEXT, TEAM_ABBREVIATION TEXT, 
                TEAM_CITY TEXT, MIN INTEGER, DIST FLOAT, ORBC INTEGER, DRBC INTEGER, RBC INTEGER, 
                TCHS INTEGER, SAST INTEGER, FTAST INTEGER, PASS INTEGER,
                AST INTEGER, CFGM INTEGER, CFGA INTEGER, CFG_PCT FLOAT,
                UFGM INTEGER, UFGA INTEGER, UFG_PCT FLOAT, FG_PCT FLOAT, 
                DFGM INTEGER, DFGA INTEGER, DFG_PCT FLOAT)'''.format(team_table_name))    
    
    
    for season in range(start_season, end_season+1):
        season_str = season_string(season)

        for season_type in ['Regular Season', 'PlayIn', 'Playoffs'] :
            logs = leaguegamelog.LeagueGameLog(season=season, season_type_all_star=season_type).get_data_frames()[0]
            game_ids = logs['GAME_ID'].unique()[:10]

            for i in range(0, len(game_ids), 100):
                print('games {} to {}'.format(i, i+100))
                for game_id in tqdm(game_ids[i:i+100], desc='progress'):
                    try:
                        tracking_boxscores = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id).get_data_frames()
                        
                        team_tracking_boxscores = tracking_boxscores[1]
                        player_tracking_boxscores = tracking_boxscores[0]
                        
                        player_tracking_boxscores.to_sql(player_table_name, conn, if_exists='append', index=False)
                        team_tracking_boxscores.to_sql(team_table_name, conn, if_exists='append', index=False)
    
                    except:
                        game_ids_not_added.append(game_id)
                    sleep(2)
                sleep(120)
                clear_output(wait=True)

        sleep(120)
        
    cur = conn.cursor()
    
    cur.execute('DELETE FROM {} WHERE rowid NOT IN (SELECT min(rowid) FROM {} GROUP BY PLAYER_ID, GAME_ID)'.format(player_table_name, player_table_name))
    cur.execute('DELETE FROM {} WHERE rowid NOT IN (SELECT min(rowid) FROM {} GROUP BY TEAM_ID, GAME_ID)'.format(team_table_name, team_table_name))
    
    conn.commit()
    
    return game_ids_not_added


## Update Data

In [20]:
## Update basic team gamelogs and player gamelogs

def update_team_basic_boxscores(conn, season):
    table_name = 'team_basic_boxscores'
    season_str = season_string(season)
        
    dfs = []
    for season_type in ['Regular Season', 'PlayIn', 'Playoffs'] :
        team_gamelogs = leaguegamelog.LeagueGameLog(season=season_str, season_type_all_star=season_type).get_data_frames()[0]
        dfs.append(team_gamelogs)
        
    team_gamelogs_updated = pd.concat(dfs)
    team_gamelogs_updated['SEASON'] = season_str
    team_gamelogs_updated.drop(columns = ['SEASON_ID', 'VIDEO_AVAILABLE'], inplace=True)
    
    team_gamelogs_updated.to_sql(table_name, conn, if_exists='append', index=False)

    cur = conn.cursor()
    cur.execute('DELETE FROM {} WHERE rowid NOT IN (SELECT min(rowid) FROM {} GROUP BY TEAM_ID, GAME_ID)'.format(table_name, table_name))
    conn.commit()
    
    return None


In [27]:
def update_team_advanced_boxscores(conn, season, dates):
    table_name = 'team_advanced_boxscores'
    
    season_str = season_string(season)
    
    game_ids_not_added = []
    
    # Pull the GAME_IDs from my data
    game_ids_in_db = pd.read_sql('''SELECT DISTINCT team_basic_boxscores.GAME_ID FROM team_basic_boxscores
                INNER JOIN team_advanced_boxscores 
                ON team_basic_boxscores.GAME_ID = team_advanced_boxscores.GAME_ID
                AND team_basic_boxscores.TEAM_ID = team_advanced_boxscores.TEAM_ID
                WHERE SEASON = "{}" '''.format(season_str), conn)

    game_ids_in_db = game_ids_in_db['GAME_ID'].tolist()
    
    missing_game_ids = []
    if len(dates) != 0:
        for date in dates:
            for season_type in ['Regular Season', 'PlayIn', 'Playoffs']:
                gamelogs = leaguegamelog.LeagueGameLog(
                    season=season_str, season_type_all_star=season_type, date_from_nullable=date, date_to_nullable=date).get_data_frames()[0]
                missing_game_ids.extend(gamelogs['GAME_ID'].unique())
            
    else:        
        # get up to date GAME_IDs
        to_date_game_ids = []
        for season_type in ['Regular Season', 'PlayIn', 'Playoffs'] :
            to_date_gamelogs = leaguegamelog.LeagueGameLog(season=season_str, season_type_all_star=season_type).get_data_frames()[0]
            to_date_game_ids.extend(to_date_gamelogs['GAME_ID'].unique())

        # See which game_ids are missing
        missing_game_ids = set(to_date_game_ids) - set(game_ids_in_db)
        
    num_games_updated = len(missing_game_ids)
    print("num_games_updated:", num_games_updated)
    
    if num_games_updated == 0:
        print("All team advanced boxscores up to date in season {}".format(season_str))
        return None
        
    for game_id in tqdm(missing_game_ids, desc='progress'):
        try:
            boxscores = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id).get_data_frames()[1]
            boxscores.to_sql(table_name, conn, if_exists='append', index=False)
            sleep(2)
        except:
            game_ids_not_added.append(game_id)  
    
    cur = conn.cursor()
    cur.execute('DELETE FROM {} WHERE rowid NOT IN (SELECT max(rowid) FROM {} GROUP BY TEAM_ID, GAME_ID)'.format(table_name, table_name))
    conn.commit()
    
    return game_ids_not_added


In [28]:
def update_team_scoring_boxscores(conn, season, dates):
    table_name = 'team_scoring_boxscores'

    season_str = season_string(season)

    game_ids_not_added = []

    # Pull the GAME_IDs from my data
    game_ids_in_db = pd.read_sql(f'''SELECT DISTINCT team_scoring_boxscores.GAME_ID FROM team_basic_boxscores
                INNER JOIN team_scoring_boxscores 
                ON team_basic_boxscores.GAME_ID = team_scoring_boxscores.GAME_ID
                AND team_basic_boxscores.TEAM_ID = team_scoring_boxscores.TEAM_ID
                WHERE SEASON = "{season_str}" ''', conn)

    game_ids_in_db = game_ids_in_db['GAME_ID'].tolist()

    missing_game_ids = []
    if len(dates) != 0:
        for date in dates:
            for season_type in ['Regular Season', 'PlayIn', 'Playoffs']:
                gamelogs = leaguegamelog.LeagueGameLog(
                    season=season_str, season_type_all_star=season_type, date_from_nullable=date, date_to_nullable=date).get_data_frames()[0]
                missing_game_ids.extend(gamelogs['GAME_ID'].unique())

    else:
        # get up to date GAME_IDs
        to_date_game_ids = []
        for season_type in ['Regular Season', 'PlayIn', 'Playoffs'] :
            to_date_gamelogs = leaguegamelog.LeagueGameLog(
                season=season_str, season_type_all_star=season_type).get_data_frames()[0]
            to_date_game_ids.extend(to_date_gamelogs['GAME_ID'].unique())

        # See which game_ids are missing
        missing_game_ids = set(to_date_game_ids) - set(game_ids_in_db)
        
    num_games_updated = len(missing_game_ids)
    print("num_games_updated:", num_games_updated)

    if num_games_updated == 0:
        print("All team advanced boxscores up to date in season {}".format(season_str))
        return None

    for game_id in tqdm(missing_game_ids, desc='progress'):
        try:
            boxscores = boxscorescoringv2.BoxScoreScoringV2(
                game_id).get_data_frames()[1]
            boxscores.to_sql(table_name, conn,
                             if_exists='append', index=False)
            sleep(2)
        except:
            game_ids_not_added.append(game_id)

    cur = conn.cursor()
    cur.execute('DELETE FROM {} WHERE rowid NOT IN (SELECT max(rowid) FROM {} GROUP BY TEAM_ID, GAME_ID)'.format(
        table_name, table_name))
    conn.commit()

    return game_ids_not_added



In [23]:
connection = sqlite3.connect('../data/nba.db')

update_team_advanced_boxscores(connection, season=2021, dates=['2022-02-09'])

connection.close()

num_games_updated: 6


progress: 100%|██████████| 6/6 [00:26<00:00,  4.39s/it]


In [29]:
def update_team_tracking_boxscores(conn, season, dates):
    team_table_name = 'team_tracking_boxscores'
    player_table_name = 'player_tracking_boxscores'
    
    season_str = season_string(season)

    game_ids_not_added = []

    # Pull the GAME_IDs from my data
    game_ids_in_db = pd.read_sql(f'''SELECT DISTINCT team_tracking_boxscores.GAME_ID 
                                FROM team_basic_boxscores
                                INNER JOIN team_tracking_boxscores 
                                ON team_basic_boxscores.GAME_ID = team_tracking_boxscores.GAME_ID
                                AND team_basic_boxscores.TEAM_ID = team_tracking_boxscores.TEAM_ID
                                WHERE SEASON = "{season_str}" ''', conn)

    game_ids_in_db = game_ids_in_db['GAME_ID'].tolist()

    missing_game_ids = []

    if len(dates) != 0:
        for date in dates:
            for season_type in ['Regular Season', 'PlayIn', 'Playoffs']:
                gamelogs = leaguegamelog.LeagueGameLog(
                    season=season_str, season_type_all_star=season_type, date_from_nullable=date, date_to_nullable=date).get_data_frames()[0]
                missing_game_ids.extend(gamelogs['GAME_ID'].unique())

    else:
        # get up to date GAME_IDs
        to_date_game_ids = []
        for season_type in ['Regular Season', 'PlayIn', 'Playoffs'] :
            to_date_gamelogs = leaguegamelog.LeagueGameLog(
                season=season_str, season_type_all_star=season_type).get_data_frames()[0]
            to_date_game_ids.extend(to_date_gamelogs['GAME_ID'].unique())

        # See which game_ids are missing
        missing_game_ids = set(to_date_game_ids) - set(game_ids_in_db)
        
    num_games_updated = len(missing_game_ids)
    print("num_games_updated:", num_games_updated)

    if num_games_updated == 0:
        print("All team tracking boxscores up to date in season {}".format(season_str))
        return None

    for game_id in tqdm(missing_game_ids, desc='progress'):
        try:
            boxscores = boxscoreplayertrackv2.BoxScorePlayerTrackV2(
                game_id).get_data_frames()
            
            boxscores[1].to_sql(team_table_name, conn,
                             if_exists='append', index=False)
            
            boxscores[0].to_sql(player_table_name, conn,
                    if_exists='append', index=False)
            sleep(2)
        except:
            game_ids_not_added.append(game_id)

    cur = conn.cursor()
    
    cur.execute(f"""DELETE FROM {team_table_name} 
                WHERE rowid NOT IN (SELECT max(rowid) 
                FROM {team_table_name} 
                GROUP BY TEAM_ID, GAME_ID)""")   
    
    cur.execute(f"""DELETE FROM {player_table_name}
                WHERE rowid NOT IN (SELECT max(rowid) 
                FROM {player_table_name} 
                GROUP BY PLAYER_ID, GAME_ID)""")
    conn.commit()

    return game_ids_not_added



In [26]:
date = '2023-04-11'
gamelogs = leaguegamelog.LeagueGameLog(
                date_from_nullable=date, date_to_nullable=date).get_data_frames()[0]

boxscores = boxscorescoringv2.BoxScoreScoringV2(
                '0022100147').get_data_frames()[1]

boxscores

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MIN,PCT_FGA_2PT,PCT_FGA_3PT,PCT_PTS_2PT,PCT_PTS_2PT_MR,PCT_PTS_3PT,PCT_PTS_FB,PCT_PTS_FT,PCT_PTS_OFF_TOV,PCT_PTS_PAINT,PCT_AST_2PM,PCT_UAST_2PM,PCT_AST_3PM,PCT_UAST_3PM,PCT_AST_FGM,PCT_UAST_FGM
0,22100147,1610612755,76ers,PHI,Philadelphia,240:00,0.56,0.44,0.458,0.021,0.406,0.052,0.135,0.052,0.438,0.364,0.636,0.923,0.077,0.571,0.429
1,22100147,1610612752,Knicks,NYK,New York,240:00,0.56,0.44,0.524,0.097,0.379,0.223,0.097,0.097,0.427,0.444,0.556,1.0,0.0,0.625,0.375


In [30]:
def update_all_data(conn, season):
    """Combines all the update functions above into one function that updates all my data"""
    print("updating basic team boxscores")
    update_team_basic_boxscores(conn = conn, season=season)
    print("updating advanced team/player boxscores")
    update_team_advanced_boxscores(conn = conn, season=season, dates=['2023-04-11', '2023-04-12', '2023-04-14'])
    print("updating scoring boxscores")
    update_team_scoring_boxscores(conn = conn, season=season, dates=['2023-04-11', '2023-04-12', '2023-04-14'])
    print("update team tracking boxscores")
    update_team_tracking_boxscores(conn, season, dates=['2023-04-11', '2023-04-12', '2023-04-14'])

In [31]:
connection = sqlite3.connect('../data/nba.db')

update_all_data(connection, 2022)

connection.close()

updating basic team boxscores
updating advanced team/player boxscores
num_games_updated: 6


progress: 100%|██████████| 6/6 [00:20<00:00,  3.48s/it]


updating scoring boxscores
num_games_updated: 6


progress: 100%|██████████| 6/6 [00:19<00:00,  3.23s/it]


update team tracking boxscores
num_games_updated: 6


progress: 100%|██████████| 6/6 [00:20<00:00,  3.43s/it]


In [16]:
connection = sqlite3.connect('../data/nba.db')

df = pd.read_sql("SELECT * FROM team_basic_boxscores", con=connection)

connection.close()

Unnamed: 0,SEASON,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS
0,2000-01,1610612739,CLE,Cleveland Cavaliers,0020000002,2000-10-31,CLE @ NJN,W,240,32,...,11,41,52,16,5,8,19,27,86,4
1,2000-01,1610612751,NJN,New Jersey Nets,0020000002,2000-10-31,NJN vs. CLE,L,240,31,...,12,35,47,24,9,8,15,31,82,-4
2,2000-01,1610612745,HOU,Houston Rockets,0020000008,2000-10-31,HOU vs. MIN,L,240,34,...,11,27,38,18,9,7,18,17,98,-8
3,2000-01,1610612750,MIN,Minnesota Timberwolves,0020000008,2000-10-31,MIN @ HOU,W,240,43,...,13,31,44,29,7,1,16,25,106,8
4,2000-01,1610612759,SAS,San Antonio Spurs,0020000009,2000-10-31,SAS vs. IND,W,240,33,...,5,32,37,17,6,4,18,22,98,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56295,2021-22,1610612738,BOS,Boston Celtics,0042100306,2022-05-27,BOS vs. MIA,L,240,32,...,6,28,34,22,10,4,18,28,103,-8
56296,2021-22,1610612748,MIA,Miami Heat,0042100307,2022-05-29,MIA vs. BOS,L,240,37,...,7,37,44,16,4,5,10,18,96,-4
56297,2021-22,1610612738,BOS,Boston Celtics,0042100307,2022-05-29,BOS @ MIA,W,240,35,...,10,41,51,22,4,6,13,26,100,4
56298,2021-22,1610612744,GSW,Golden State Warriors,0042100401,2022-06-02,GSW vs. BOS,L,240,39,...,12,27,39,24,8,6,14,16,108,-12
