In [2]:
from nba_api.stats.static import players, teams
from nba_api.stats.library.parameters import SeasonAll
from nba_api.stats.endpoints import leaguegamelog
from nba_api.stats.endpoints import boxscoreadvancedv2
from nba_api.stats.endpoints import boxscorescoringv2

import random
import pandas as pd
import numpy as np
from tqdm import tqdm
import time as time
from time import sleep
from datetime import date
from IPython.core.display import clear_output
import sqlite3

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait       
from selenium.webdriver.common.by import By       
from selenium.webdriver.support import expected_conditions as EC

options = Options()
options.headless = True

import random

In [3]:
def season_string(season):
    return str(season) + '-' + str(season+1)[-2:]

In [4]:
def update_team_basic_boxscores(conn, season):
    table_name = 'team_basic_boxscores'
    season_str = season_string(season)
        
    dfs = []
    for season_type in ['Regular Season', 'Playoffs']:
        team_gamelogs = leaguegamelog.LeagueGameLog(season=season_str, season_type_all_star=season_type).get_data_frames()[0]
        dfs.append(team_gamelogs)
        
    team_gamelogs_updated = pd.concat(dfs)
    team_gamelogs_updated['SEASON'] = season_str
    team_gamelogs_updated.drop(columns = ['SEASON_ID', 'VIDEO_AVAILABLE'], inplace=True)
    
    team_gamelogs_updated.to_sql(table_name, conn, if_exists='append', index=False)

    cur = conn.cursor()
    cur.execute('DELETE FROM {} WHERE rowid NOT IN (SELECT min(rowid) FROM {} GROUP BY TEAM_ID, GAME_ID)'.format(table_name, table_name))
    conn.commit()
    
    return None

In [5]:
def update_team_advanced_boxscores(conn, season, dates):
    table_name = 'team_advanced_boxscores'
    
    season_str = season_string(season)
    
    game_ids_not_added = []
    
    # Pull the GAME_IDs from my data
    game_ids_in_db = pd.read_sql('''SELECT DISTINCT team_basic_boxscores.GAME_ID FROM team_basic_boxscores
                INNER JOIN team_advanced_boxscores 
                ON team_basic_boxscores.GAME_ID = team_advanced_boxscores.GAME_ID
                AND team_basic_boxscores.TEAM_ID = team_advanced_boxscores.TEAM_ID
                WHERE SEASON = "{}" '''.format(season_str), conn)

    game_ids_in_db = game_ids_in_db['GAME_ID'].tolist()
    
    missing_game_ids = []
    if len(dates) != 0:
        for date in dates:
            gamelogs = leaguegamelog.LeagueGameLog(
                season=season_str, date_from_nullable=date, date_to_nullable=date).get_data_frames()[0]
            missing_game_ids.extend(gamelogs['GAME_ID'].unique())
            
    else:        
        # get up to date GAME_IDs
        to_date_game_ids = []
        for season_type in ['Regular Season', 'Playoffs']:
            to_date_gamelogs = leaguegamelog.LeagueGameLog(season=season_str, season_type_all_star=season_type).get_data_frames()[0]
            to_date_game_ids.extend(to_date_gamelogs['GAME_ID'].unique())
        
        # See which game_ids are missing
        missing_game_ids = set(to_date_game_ids) - set(game_ids_in_db)
        
    num_games_updated = len(missing_game_ids)
    print("num_games_updated:", num_games_updated)
    
    if num_games_updated == 0:
        print("All team advanced boxscores up to date in season {}".format(season_str))
        return None
    
    for game_id in tqdm(missing_game_ids, desc='progress'):
        try:
            boxscores = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id).get_data_frames()[1]
            boxscores.to_sql(table_name, conn, if_exists='append', index=False)
            sleep(2)
        except:
            game_ids_not_added.append(game_id)  
    
    cur = conn.cursor()
    cur.execute('DELETE FROM {} WHERE rowid NOT IN (SELECT max(rowid) FROM {} GROUP BY TEAM_ID, GAME_ID)'.format(table_name, table_name))
    conn.commit()
    
    return game_ids_not_added

In [16]:
#update boxscores with any missing data
date = date.today()
year = date.year
month = date.month

if month >= 9:
    season = year
else:
    season = year-1
    
con = sqlite3.connect('nba.db')
update_team_basic_boxscores(con, season)
missing_game_ids = update_team_advanced_boxscores(con, season, [])
print('Number of games missing: ', len(missing_game_ids))

progress:   0%|          | 0/1 [00:00<?, ?it/s]

num_games_updated: 1


progress: 100%|██████████| 1/1 [00:03<00:00,  3.84s/it]

Number of games missing:  0





In [6]:
#SCRATCH
con = sqlite3.connect('nba.db')

team_basic_boxscores_df = pd.read_sql('select * from team_basic_boxscores', con)
team_advanced_boxscores_df = pd.read_sql('select * from team_advanced_boxscores', con)

In [7]:
#SCRATCH
team_boxscores_df = team_basic_boxscores_df.merge(team_advanced_boxscores_df, how='inner', on=['GAME_ID', 'TEAM_ID'])
print(max(team_boxscores_df['GAME_DATE']))
team_boxscores_df.head()

2022-06-16


Unnamed: 0,SEASON,TEAM_ID,TEAM_ABBREVIATION_x,TEAM_NAME_x,GAME_ID,GAME_DATE,MATCHUP,WL,MIN_x,FGM,...,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
0,2021-22,1610612747,LAL,Los Angeles Lakers,22100002,2021-10-19,LAL vs. GSW,L,240,45,...,16.1,0.553,0.551,1.0,0.198,115.28,112.5,93.75,112.0,0.422
1,2021-22,1610612744,GSW,Golden State Warriors,22100002,2021-10-19,GSW @ LAL,W,240,41,...,15.0,0.516,0.57,1.0,0.2,115.28,112.5,93.75,113.0,0.578
2,2021-22,1610612751,BKN,Brooklyn Nets,22100001,2021-10-19,BKN @ MIL,L,240,37,...,12.7,0.542,0.552,1.0,0.2,105.02,102.0,85.0,102.0,0.407
3,2021-22,1610612749,MIL,Milwaukee Bucks,22100001,2021-10-19,MIL vs. BKN,W,240,48,...,7.8,0.538,0.562,1.0,0.194,105.02,102.0,85.0,102.0,0.593
4,2021-22,1610612754,IND,Indiana Pacers,22100003,2021-10-20,IND @ CHA,L,240,42,...,15.9,0.561,0.607,1.0,0.199,112.22,106.5,88.75,107.0,0.52


In [8]:
#SCRATCH
team_boxscores_df[team_boxscores_df['GAME_DATE']=='2022-06-16']

Unnamed: 0,SEASON,TEAM_ID,TEAM_ABBREVIATION_x,TEAM_NAME_x,GAME_ID,GAME_DATE,MATCHUP,WL,MIN_x,FGM,...,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
56306,2021-22,1610612744,GSW,Golden State Warriors,42100406,2022-06-16,GSW @ BOS,W,240,38,...,18.1,0.516,0.539,1.0,0.197,97.4,93.5,77.92,94.0,0.54
56307,2021-22,1610612738,BOS,Boston Celtics,42100406,2022-06-16,BOS vs. GSW,L,240,34,...,24.7,0.494,0.528,1.0,0.198,97.4,93.5,77.92,93.0,0.46


In [None]:
#possibly add code to remove data that is more that 20 seasons old

In [14]:
#pull spreads and moneylines for the day
def pull_spreads(date):
    
    dates_with_no_data = []
    
    seasons = []
    gm_dates = []
    away_teams = []
    home_teams = []
    away_scoreboards = []
    home_scoreboards = []
    away_spreads = []
    home_spreads = []
    
    web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/?date={}'.format(date)
    path = '../Downloads/chromedriver'
    driver = webdriver.Chrome(path)
    driver.get(web)
    sleep(random.randint(1,2))

    try:
        single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')

    except:
        print("No Data for {}".format(date))
        dates_with_no_data.append(date)

    num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

    num_listed_events = len(single_row_events)
    cutoff = num_listed_events - num_postponed_events

    for event in single_row_events[:cutoff]:

        away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
        home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text
        away_teams.append(away_team)
        home_teams.append(home_team)
        gm_dates.append(date)

        scoreboard = event.find_elements_by_class_name('scoreboard-1TXQV')

        home_score = []
        away_score = []

        for score in scoreboard:
            quarters = score.find_elements_by_class_name('scoreboardColumn-2OtpR')
            for i in range(len(quarters)):
                scores = quarters[i].text.split('\n')
                away_score.append(scores[0])
                home_score.append(scores[1])

            home_score = ",".join(home_score)
            away_score = ",".join(away_score)

            away_scoreboards.append(away_score)
            home_scoreboards.append(home_score)


        if len(away_scoreboards) != len(away_teams):
            num_to_add = len(away_teams) - len(away_scoreboards)
            for i in range(num_to_add):
                away_scoreboards.append('')
                home_scoreboards.append('')

        spreads = event.find_elements_by_class_name('pointer-2j4Dk')
        away_lines = []
        home_lines = []
        for i in range(len(spreads)):    
            if i % 2 == 0:
                away_lines.append(spreads[i].text)
            else:
                home_lines.append(spreads[i].text)

        away_lines = ",".join(away_lines)
        home_lines = ",".join(home_lines)

        away_spreads.append(away_lines)
        home_spreads.append(home_lines)

        if len(away_spreads) != len(away_teams):
            num_to_add = len(away_teams) - len(away_spreads)
            for i in range(num_to_add):
                away_scoreboards.append('')
                home_scoreboards.append('')

    driver.quit()
    clear_output(wait=True)

    df = pd.DataFrame({'GM_DATE':gm_dates,
                      'AWAY_TEAM':away_teams,
                      'HOME_TEAM':home_teams,
                      'AWAY_SCOREBOARD':away_scoreboards,
                      'HOME_SCOREBOARD':home_scoreboards,
                      'AWAY_SPREAD':away_spreads,
                      'HOME_SPREAD':home_spreads})

    df = df.sort_values(['GM_DATE']).reset_index(drop=True)

    
    return df

In [19]:
spreads_df = pull_spreads(date)
#spreads_df = pull_spreads('2022-06-16')

  driver = webdriver.Chrome(path)
  single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')
  num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))


In [20]:
spreads_df

Unnamed: 0,GM_DATE,AWAY_TEAM,HOME_TEAM,AWAY_SCOREBOARD,HOME_SCOREBOARD,AWAY_SPREAD,HOME_SPREAD
0,2022-06-16,Golden State,Boston,,,"-,+4-113,-,+3½-118,-","-,-4-109,-,-3½-118,-"


In [None]:
#compute weighted average stats for each team

num_games = 50

non_feature_cols = {'SEASON', 'TEAM_ID', 'GAME_ID', 'GAME_DATE', 'HOME_TEAM', 'TEAM_CITY'}
feature_cols = set(team_boxscores_df.columns) - non_feature_cols

for i, row in spreads_df.iterrows():
    away_team = row['AWAY_TEAM']
    home_team = row['HOME_TEAM']
    
    if away_team == 'LA':
        away_team = 'L.A. Clippers'
    elif away_team == 'Los Angeles':
        away_team = 'L.A. Lakers'
    if home_team == 'LA':
        home_team = 'L.A. Clippers'
    elif home_team == 'Los Angeles':
        home_team = 'L.A. Lakers'
    
    away_team_df = team_boxscores_df[team_boxscores_df['TEAM_CITY']==away_team]
    home_team_df = team_boxscores_df[team_boxscores_df['TEAM_CITY']==home_team]
    
    away_team_df.sort_values(by=['GAME_DATE'], ascending=True, inplace=True)
    away_team_df = away_team_df.tail(num_games)
    
    home_team_df.sort_values(by=['GAME_DATE'], ascending=True, inplace=True)
    home_team_df = home_team_df.tail(num_games)
    
    
    

In [22]:
team_boxscores_df['TEAM_CITY']

0         Los Angeles
1        Golden State
2            Brooklyn
3           Milwaukee
4             Indiana
             ...     
56303       Milwaukee
56304    Golden State
56305          Boston
56306    Golden State
56307          Boston
Name: TEAM_CITY, Length: 56308, dtype: object

In [None]:
#evaluate available bets

In [None]:
#send email notification