In [10]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait       
from selenium.webdriver.common.by import By       
from selenium.webdriver.support import expected_conditions as EC

options = Options()
options.headless = True

import numpy as np
import pandas as pd
import time as time
from time import sleep
import random
from tqdm import tqdm
import sqlite3
from IPython.display import clear_output

from nba_api.stats.endpoints import leaguegamelog
from nba_api.stats.library.parameters import SeasonAll
from nba_api.stats.static import teams

In [11]:
def season_string(season):
    return str(season) + '-' + str(season+1)[-2:]

def get_game_dates(season):
    season_str = season_string(season)
    dates = []
    for season_type in ['Regular Season', 'Playoffs']:
        games = leaguegamelog.LeagueGameLog(season=season_str, season_type_all_star=season_type).get_data_frames()[0]
        dates.extend(games['GAME_DATE'].unique())
        sleep(1)
    return dates

In [15]:
def add_spreads(conn, start_season, end_season, if_exists='append'):
    
    table_name = 'spreads'

    if if_exists == 'replace':
        conn.execute('DROP TABLE IF EXISTS ' + table_name)
        
    conn.execute("""CREATE TABLE IF NOT EXISTS {} (SEASON TEXT, GM_DATE DATE, HOME_TEAM TEXT,
            AWAY_TEAM TEXT, AWAY_SCOREBOARD TEXT, HOME_SCOREBOARD TEXT, AWAY_SPREAD TEXT,
            HOME_SPREAD TEXT)""".format(table_name))
    
    dates_with_no_data = []
    
    seasons = []
    gm_dates = []
    away_teams = []
    home_teams = []
    away_scoreboards = []
    home_scoreboards = []
    away_spreads = []
    home_spreads = []
    
    for season in range(start_season, end_season+1):
        print("scraping season: {}".format(season_string(season)))
        dates = get_game_dates(season)    
        
        for date in tqdm(dates, desc='progress'):
            web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/?date={}'.format(date)
            path = '../Downloads/chromedriver'
            driver = webdriver.Chrome(path)
            driver.get(web)
            sleep(random.randint(1,2))

            try:
                single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')
                
            except:
                print("No Data for {}".format(date))
                dates_with_no_data.append(date)
                continue
                
            num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

            num_listed_events = len(single_row_events)
            cutoff = num_listed_events - num_postponed_events

            for event in single_row_events[:cutoff]:

                away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
                home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text
                away_teams.append(away_team)
                home_teams.append(home_team)
                gm_dates.append(date)

                seasons.append(season_string(season))
                
                scoreboard = event.find_elements_by_class_name('scoreboard-1TXQV')

                home_score = []
                away_score = []

                for score in scoreboard:
                    quarters = score.find_elements_by_class_name('scoreboardColumn-2OtpR')
                    for i in range(len(quarters)):
                        scores = quarters[i].text.split('\n')
                        away_score.append(scores[0])
                        home_score.append(scores[1])
                        
                    home_score = ",".join(home_score)
                    away_score = ",".join(away_score)
                    
                    away_scoreboards.append(away_score)
                    home_scoreboards.append(home_score)


                if len(away_scoreboards) != len(away_teams):
                    num_to_add = len(away_teams) - len(away_scoreboards)
                    for i in range(num_to_add):
                        away_scoreboards.append('')
                        home_scoreboards.append('')

                spreads = event.find_elements_by_class_name('pointer-2j4Dk')
                away_lines = []
                home_lines = []
                for i in range(len(spreads)):    
                    if i % 2 == 0:
                        away_lines.append(spreads[i].text)
                    else:
                        home_lines.append(spreads[i].text)
                
                away_lines = ",".join(away_lines)
                home_lines = ",".join(home_lines)
                
                away_spreads.append(away_lines)
                home_spreads.append(home_lines)

                if len(away_spreads) != len(away_teams):
                    num_to_add = len(away_teams) - len(away_spreads)
                    for i in range(num_to_add):
                        away_scoreboards.append('')
                        home_scoreboards.append('')

            driver.quit()
            clear_output(wait=True)

    df = pd.DataFrame({'SEASON':seasons, 
                      'GM_DATE':gm_dates,
                      'AWAY_TEAM':away_teams,
                      'HOME_TEAM':home_teams,
                      'AWAY_SCOREBOARD':away_scoreboards,
                      'HOME_SCOREBOARD':home_scoreboards,
                      'AWAY_SPREAD':away_spreads,
                      'HOME_SPREAD':home_spreads})

    df = df.sort_values(['GM_DATE']).reset_index(drop=True)
    
    df.to_sql(table_name, conn, if_exists='append', index=False)
    
    cur = conn.cursor()
    cur.execute('''DELETE FROM spreads 
                    WHERE rowid NOT IN (SELECT MIN(rowid) FROM spreads 
                                        GROUP BY GM_DATE, AWAY_TEAM, HOME_TEAM)''')
    conn.commit()
    
    return df

In [16]:
con = sqlite3.connect("nba.db")

spreads_df = add_spreads(con, 2021, 2021, if_exists='append')

spreads_df

progress: 100%|██████████| 210/210 [44:51<00:00, 12.82s/it]


Unnamed: 0,SEASON,GM_DATE,AWAY_TEAM,HOME_TEAM,AWAY_SCOREBOARD,HOME_SCOREBOARD,AWAY_SPREAD,HOME_SPREAD
0,2021-22,2021-10-19,Brooklyn,Milwaukee,25342619104,37293130127,"+1½-110,+2-113,+2-113,+2½-135,-","-1½-110,-2-108,-2-108,-2½-105,-"
1,2021-22,2021-10-19,Golden State,L.A. Lakers,32213038121,34252629114,"+3-110,+3-112,+3-112,+4-135,-","-3-110,-3-109,-3-109,-4-105,-"
2,2021-22,2021-10-20,Indiana,Charlotte,38371334122,27323331123,"-1+100,+½-110,+1½-110,-1½-110,-","+1-120,-½-110,-1½-112,+1½-110,-"
3,2021-22,2021-10-20,Chicago,Detroit,1426312394,2024251988,"-5-110,-5-112,-5-112,-5-110,-","+5-110,+5-110,+5-110,+5-110,-"
4,2021-22,2021-10-20,Washington,Toronto,2631241798,1819222483,"+2½-110,+2½-109,+3-109,+4½-160,-","-2½-110,-2½-112,-3-112,-4½+115,-"
...,...,...,...,...,...,...,...,...
1298,2021-22,2022-06-05,Boston,Golden State,3020142488,31213520107,"+5-110,+5-113,-,+4½-118,-","-5-110,-5-108,-,-5½+100,-"
1299,2021-22,2022-06-08,Golden State,Boston,22343311100,33352523116,"+3½-110,+3½-113,-,+3½-125,-","-3½-110,-3½-109,-,-3½-111,-"
1300,2021-22,2022-06-10,Golden State,Boston,27223028107,2826241997,"+4-110,+4-113,-,+3½-111,-","-4-110,-4-109,-,-3½-125,-"
1301,2021-22,2022-06-13,Boston,Golden State,1623352094,27242429104,"+4-110,+4-110,-,+3½-111,-","-4-110,-4-110,-,-3½-125,-"
