In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait       
from selenium.webdriver.common.by import By       
from selenium.webdriver.support import expected_conditions as EC

options = Options()
options.headless = True

import numpy as np
import pandas as pd
import time as time
from time import sleep
import random
from tqdm import tqdm
import sqlite3
from IPython.display import clear_output

from nba_api.stats.endpoints import leaguegamelog
from nba_api.stats.library.parameters import SeasonAll
from nba_api.stats.static import teams

In [3]:
def season_string(season):
    return str(season) + '-' + str(season+1)[-2:]

def get_game_dates(season):
    season_str = season_string(season)
    dates = []
    for season_type in ['Regular Season', 'Playoffs']:
        games = leaguegamelog.LeagueGameLog(season=season_str, season_type_all_star=season_type).get_data_frames()[0]
        dates.extend(games['GAME_DATE'].unique())
        sleep(1)
    return dates


get_game_dates(2021)

['2021-10-19',
 '2021-10-20',
 '2021-10-21',
 '2021-10-22',
 '2021-10-23',
 '2021-10-24',
 '2021-10-25',
 '2021-10-26',
 '2021-10-27',
 '2021-10-28',
 '2021-10-29',
 '2021-10-30',
 '2021-10-31',
 '2021-11-01',
 '2021-11-02']

In [115]:
# Get Moneylines
def add_moneylines(conn, start_season, end_season, if_exists='append'):
    
    table_name = 'moneylines'

    if if_exists == 'replace':
        conn.execute('DROP TABLE IF EXISTS ' + table_name)
        
    conn.execute("""CREATE TABLE IF NOT EXISTS {} (SEASON TEXT, GM_DATE DATE, HOME_TEAM TEXT,
            AWAY_TEAM TEXT, AWAY_ML TEXT, HOME_ML TEXT)""".format(table_name))
    
    dates_with_no_data = []
    
    seasons = []
    gm_dates = []
    away_teams = []
    home_teams = []
    away_mls = []
    home_mls = []

    for season in range(start_season, end_season+1):
        print("scraping season: {}".format(season_string(season)))
        dates = get_game_dates(season)
        
        for date in tqdm(dates, desc='progress'):
            web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/money-line/?date={}'.format(date)
            path = '../chromedriver.exe'
            driver = webdriver.Chrome(path)
            driver.get(web)
            sleep(random.randint(1,2))

            try:
                single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')
            
            except:
                print("No Data for {}".format(date))
                dates_with_no_data.append(date)
                continue
                      
            num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

            num_listed_events = len(single_row_events)
            cutoff = num_listed_events - num_postponed_events

            for event in single_row_events[:cutoff]:
                seasons.append(season_string(season))

                away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
                home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text

                away_teams.append(away_team)
                home_teams.append(home_team)

                gm_dates.append(date)

                mls = event.find_elements_by_class_name('pointer-2j4Dk')
                
                away_moneyline = []
                home_moneyline = []
                
                
                for i, ml in enumerate(mls):
                    if i%2==0:
                        away_moneyline.append(ml.text)
                    else:
                        home_moneyline.append(ml.text)
                
                away_moneyline = ",".join(away_moneyline)
                home_moneyline = ",".join(home_moneyline)

                away_mls.append(away_moneyline)
                home_mls.append(home_moneyline)
                
            driver.quit()
            
        clear_output(wait=True)
        
    df = pd.DataFrame({'SEASON':seasons,
                       'GM_DATE':gm_dates,
                       'AWAY_TEAM':away_teams, 
                      'HOME_TEAM':home_teams,
                      'AWAY_ML':away_mls,
                      'HOME_ML':home_mls,
                                         })
    
    df = df.sort_values(['GM_DATE']).reset_index(drop=True)
    
    df.to_sql(table_name, conn, if_exists='append', index=False)

    cur = connection.cursor()
    cur.execute('''DELETE FROM moneylines 
                    WHERE rowid NOT IN (SELECT MIN(rowid) FROM moneylines
                                        GROUP BY GM_DATE, AWAY_TEAM, HOME_TEAM)''')
    conn.commit()
    
    return df

In [4]:
connection = sqlite3.connect('../data/nba.db')

mls = pd.read_sql("SELECT * FROM moneylines WHERE AWAY_ML == '-,-,-,-'", connection)
mls

Unnamed: 0,SEASON,GM_DATE,HOME_TEAM,AWAY_TEAM,AWAY_ML,HOME_ML
0,2006-07,2006-11-01,Memphis,New York,"-,-,-,-","-,-,-,-"
1,2006-07,2006-11-01,Utah,Houston,"-,-,-,-","-,-,-,-"
2,2006-07,2006-11-01,Brooklyn,Toronto,"-,-,-,-","-,-,-,-"
3,2006-07,2006-11-01,Orlando,Chicago,"-,-,-,-","-,-,-,-"
4,2007-08,2007-10-30,San Antonio,Portland,"-,-,-,-","-,-,-,-"
...,...,...,...,...,...,...
1632,2010-11,2011-03-04,New York,Cleveland,"-,-,-,-","-,-,-,-"
1633,2010-11,2011-03-23,Milwaukee,Sacramento,"-,-,-,-","-,-,-,-"
1634,2010-11,2011-03-31,San Antonio,Boston,"-,-,-,-","-,-,-,-"
1635,2013-14,2013-11-23,Milwaukee,Charlotte,"-,-,-,-","-,-,-,-"


In [7]:
seasons = []
gm_dates = []
away_teams = []
home_teams = []
away_scoreboards = []
home_scoreboards = []
away_spreads = []
home_spreads = []

# for season in range(start_season, end_season+1):
#     print("scraping season: {}".format(season_string(season)))
#     dates = get_game_dates(season)    

dates = get_game_dates(2021)
season=2021
for date in tqdm(dates, desc='progress'):
    web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/?date={}'.format(date)
    path = '../chromedriver.exe'
    driver = webdriver.Chrome(path)
    driver.get(web)
#   sleep(random.randint(1,2))

    try:
        single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')

    except:
        print("No Data for {}".format(date))
        dates_with_no_data.append(date)
        continue

    num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

    num_listed_events = len(single_row_events)
    cutoff = num_listed_events - num_postponed_events

    for event in single_row_events[:cutoff]:

        away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
        home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text
        away_teams.append(away_team)
        home_teams.append(home_team)
        gm_dates.append(date)

        seasons.append(season_string(season))

        scoreboard = event.find_elements_by_class_name('scoreboard-1TXQV')

        home_score = []
        away_score = []

        for score in scoreboard:
            quarters = score.find_elements_by_class_name('scoreboardColumn-2OtpR')
            for i in range(len(quarters)):
                scores = quarters[i].text.split('\n')
                away_score.append(scores[0])
                home_score.append(scores[1])

            home_score = ",".join(home_score)
            away_score = ",".join(away_score)

            away_scoreboards.append(away_score)
            home_scoreboards.append(home_score)


        if len(away_scoreboards) != len(away_teams):
            num_to_add = len(away_teams) - len(away_scoreboards)
            for i in range(num_to_add):
                away_scoreboards.append('')
                home_scoreboards.append('')

        spreads = event.find_elements_by_class_name('pointer-2j4Dk')
        away_lines = []
        home_lines = []
        for i in range(len(spreads)):    
            if i % 2 == 0:
                away_lines.append(spreads[i].text)
            else:
                home_lines.append(spreads[i].text)

        away_lines = ",".join(away_lines)
        home_lines = ",".join(home_lines)

        away_spreads.append(away_lines)
        home_spreads.append(home_lines)

        if len(away_spreads) != len(away_teams):
            num_to_add = len(away_teams) - len(away_spreads)
            for i in range(num_to_add):
                away_scoreboards.append('')
                home_scoreboards.append('')

    driver.quit()
    clear_output(wait=True)

df = pd.DataFrame({'SEASON':seasons, 
                  'GM_DATE':gm_dates,
                  'AWAY_TEAM':away_teams,
                  'HOME_TEAM':home_teams,
                  'AWAY_SCOREBOARD':away_scoreboards,
                  'HOME_SCOREBOARD':home_scoreboards,
                  'AWAY_SPREAD':away_spreads,
                  'HOME_SPREAD':home_spreads})

df

progress: 100%|██████████████████████████████████████████████████████████████████████████| 6/6 [00:46<00:00,  7.76s/it]


Unnamed: 0,SEASON,GM_DATE,AWAY_TEAM,HOME_TEAM,AWAY_SCOREBOARD,HOME_SCOREBOARD,AWAY_SPREAD,HOME_SPREAD
0,2021-22,2021-10-19,Brooklyn,Milwaukee,25342619104.0,37293130127.0,"+1½-110,+2-113,+2-113,+2½-135","-1½-110,-2-108,-2-108,-2½-105"
1,2021-22,2021-10-19,Golden State,L.A. Lakers,32213038121.0,34252629114.0,"+3-110,+3-112,+3-112,+4-135","-3-110,-3-109,-3-109,-4-105"
2,2021-22,2021-10-20,Indiana,Charlotte,38371334122.0,27323331123.0,"-1+100,+½-110,+1½-110,-1½-110","+1-120,-½-110,-1½-112,+1½-110"
3,2021-22,2021-10-20,Chicago,Detroit,1426312394.0,2024251988.0,"-5-110,-5-112,-5-112,-5-110","+5-110,+5-110,+5-110,+5-110"
4,2021-22,2021-10-20,Washington,Toronto,2631241798.0,1819222483.0,"+2½-110,+2½-109,+3-109,+4½-160","-2½-110,-2½-112,-3-112,-4½+115"
5,2021-22,2021-10-20,Boston,New York,35232434126134.0,292532301210138.0,"+2-110,+1½-107,+2-109,+3½-140","-2-110,-1½-114,-2-112,-3½+100"
6,2021-22,2021-10-20,Cleveland,Memphis,32292931121.0,32412138132.0,"+7-105,+7½-110,+7½-110,+8-150","-7-115,-7½-110,-7½-110,-8+110"
7,2021-22,2021-10-20,Philadelphia,New Orleans,29242836117.0,2528172797.0,"-4-110,-4½-107,-4½-107,-2½-140","+4-110,+4½-114,+4½-114,+2½+100"
8,2021-22,2021-10-20,Houston,Minnesota,21242635106.0,32402725124.0,"+6½-110,+6½-112,+5½-110,+7½-145","-6½-110,-6½-109,-5½-110,-7½+105"
9,2021-22,2021-10-20,Orlando,San Antonio,2821222697.0,32293329123.0,"+6½-110,+6½-110,+6½-110,+4½+125","-6½-110,-6½-110,-6½-110,-4½-175"


In [114]:
def add_spreads(conn, start_season, end_season, if_exists='append'):
    
    table_name = 'spreads'

    if if_exists == 'replace':
        conn.execute('DROP TABLE IF EXISTS ' + table_name)
        
    conn.execute("""CREATE TABLE IF NOT EXISTS {} (SEASON TEXT, GM_DATE DATE, HOME_TEAM TEXT,
            AWAY_TEAM TEXT, AWAY_SCOREBOARD TEXT, HOME_SCOREBOARD TEXT, AWAY_SPREAD TEXT,
            HOME_SPREAD TEXT)""".format(table_name))
    
    dates_with_no_data = []
    
    seasons = []
    gm_dates = []
    away_teams = []
    home_teams = []
    away_scoreboards = []
    home_scoreboards = []
    away_spreads = []
    home_spreads = []
    
    for season in range(start_season, end_season+1):
        print("scraping season: {}".format(season_string(season)))
        dates = get_game_dates(season)    
        
        for date in tqdm(dates, desc='progress'):
            web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/?date={}'.format(date)
            path = '../chromedriver.exe'
            driver = webdriver.Chrome(path)
            driver.get(web)
            sleep(random.randint(1,2))

            try:
                single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')
                
            except:
                print("No Data for {}".format(date))
                dates_with_no_data.append(date)
                continue
                
            num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

            num_listed_events = len(single_row_events)
            cutoff = num_listed_events - num_postponed_events

            for event in single_row_events[:cutoff]:

                away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
                home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text
                away_teams.append(away_team)
                home_teams.append(home_team)
                gm_dates.append(date)

                seasons.append(season_string(season))
                
                scoreboard = event.find_elements_by_class_name('scoreboard-1TXQV')

                home_score = []
                away_score = []

                for score in scoreboard:
                    quarters = score.find_elements_by_class_name('scoreboardColumn-2OtpR')
                    for i in range(len(quarters)):
                        scores = quarters[i].text.split('\n')
                        away_score.append(scores[0])
                        home_score.append(scores[1])
                        
                    home_score = ",".join(home_score)
                    away_score = ",".join(away_score)
                    
                    away_scoreboards.append(away_score)
                    home_scoreboards.append(home_score)


                if len(away_scoreboards) != len(away_teams):
                    num_to_add = len(away_teams) - len(away_scoreboards)
                    for i in range(num_to_add):
                        away_scoreboards.append('')
                        home_scoreboards.append('')

                spreads = event.find_elements_by_class_name('pointer-2j4Dk')
                away_lines = []
                home_lines = []
                for i in range(len(spreads)):    
                    if i % 2 == 0:
                        away_lines.append(spreads[i].text)
                    else:
                        home_lines.append(spreads[i].text)
                
                away_lines = ",".join(away_lines)
                home_lines = ",".join(home_lines)
                
                away_spreads.append(away_lines)
                home_spreads.append(home_lines)

                if len(away_spreads) != len(away_teams):
                    num_to_add = len(away_teams) - len(away_spreads)
                    for i in range(num_to_add):
                        away_scoreboards.append('')
                        home_scoreboards.append('')

            driver.quit()
            clear_output(wait=True)

    df = pd.DataFrame({'SEASON':seasons, 
                      'GM_DATE':gm_dates,
                      'AWAY_TEAM':away_teams,
                      'HOME_TEAM':home_teams,
                      'AWAY_SCOREBOARD':away_scoreboards,
                      'HOME_SCOREBOARD':home_scoreboards,
                      'AWAY_SPREAD':away_spreads,
                      'HOME_SPREAD':home_spreads})

    df = df.sort_values(['GM_DATE']).reset_index(drop=True)
    
    df.to_sql(table_name, conn, if_exists='append', index=False)
    
    cur = connection.cursor()
    cur.execute('''DELETE FROM spreads 
                    WHERE rowid NOT IN (SELECT MIN(rowid) FROM spreads 
                                        GROUP BY GM_DATE, AWAY_TEAM, HOME_TEAM)''')
    conn.commit()
    
    return df


In [117]:
spreads_df = add_spreads(connection, 2007, 2020, if_exists='append')

progress: 100%|██████████████████████████████████████████████████████████████████████| 187/187 [30:04<00:00,  9.65s/it]


In [118]:
connection = sqlite3.connect("../data/nba.db")

spreads = pd.read_sql("SELECT * FROM spreads", connection)

# spreads = spreads.loc[(spreads['AWAY_SPREAD']!='') & (spreads['HOME_SPREAD'] != '')]


# spreads.to_sql("spreads", connection, if_exists='replace', index=False)

spreads

Unnamed: 0,SEASON,GM_DATE,HOME_TEAM,AWAY_TEAM,AWAY_SCOREBOARD,HOME_SCOREBOARD,AWAY_SPREAD,HOME_SPREAD
0,2006-07,2006-10-31,Miami,Chicago,22372128108,1614211566,"-,-,+4½-110,-","-,-,-4½-110,-"
1,2006-07,2006-10-31,L.A. Lakers,Phoenix,41172127106,26273427114,"-,-,-8½-110,-","-,-,+8½-110,-"
2,2006-07,2006-11-02,Dallas,San Antonio,2619272597,2724241691,"-,-,+3-110,-","-,-,-3-110,-"
3,2006-07,2006-11-02,L.A. Clippers,Denver,2424222595,1831163196,"-,-,+4½-110,-","-,-,-4½-110,-"
4,2006-07,2006-11-03,Memphis,Charlotte,2126231383,2420163696,"-,-,+5-110,-","-,-,-5-110,-"
...,...,...,...,...,...,...,...,...
18771,2020-21,2021-07-08,Phoenix,Milwaukee,29163330108,26303230118,"+4-108,+4½-105,+4½-105,-","-4-108,-4½-115,-4½-115,-"
18772,2020-21,2021-07-11,Milwaukee,Phoenix,28173124100,25353822120,"+5-108,+4½-105,+4½-110,-","-5-108,-4½-115,-4½-110,-"
18773,2020-21,2021-07-14,Milwaukee,Phoenix,23293021103,20322433109,"+5-108,+4½-113,+4½-115,-","-5-108,-4½-107,-4½-105,-"
18774,2020-21,2021-07-17,Phoenix,Milwaukee,21433623123,37242929119,"+4-108,+4-111,+4½-115,-","-4-108,-4-109,-4½-105,-"


In [5]:
def tables_in_sqlite_db(conn):
    cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = [
        v[0] for v in cursor.fetchall()
        if v[0] != "sqlite_sequence"
    ]
    cursor.close()
    return tables

connection = sqlite3.connect("../data/nba.db")

tables_in_sqlite_db(connection)

['moneylines',
 'team_basic_boxscores',
 'team_advanced_boxscores',
 'team_scoring_boxscores',
 'spreads']

In [23]:
df = leaguegamelog.LeagueGameLog(season=2020, season_type_all_star='Playoffs').get_data_frames()[0]

df['SEASON_ID'].str[-4:].astype(int).apply(season_string)
# df

0      2020-21
1      2020-21
2      2020-21
3      2020-21
4      2020-21
        ...   
165    2020-21
166    2020-21
167    2020-21
168    2020-21
169    2020-21
Name: SEASON_ID, Length: 170, dtype: object

In [6]:
connection = sqlite3.connect("../data/nba.db")

pd.read_sql("SELECT * FROM team_basic_boxscores", connection)

Unnamed: 0,SEASON,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS
0,2000-01,1610612739,CLE,Cleveland Cavaliers,0020000002,2000-10-31,CLE @ NJN,W,240,32,...,11,41,52,16,5,8,19,27,86,4
1,2000-01,1610612751,NJN,New Jersey Nets,0020000002,2000-10-31,NJN vs. CLE,L,240,31,...,12,35,47,24,9,8,15,31,82,-4
2,2000-01,1610612745,HOU,Houston Rockets,0020000008,2000-10-31,HOU vs. MIN,L,240,34,...,11,27,38,18,9,7,18,17,98,-8
3,2000-01,1610612750,MIN,Minnesota Timberwolves,0020000008,2000-10-31,MIN @ HOU,W,240,43,...,13,31,44,29,7,1,16,25,106,8
4,2000-01,1610612759,SAS,San Antonio Spurs,0020000009,2000-10-31,SAS vs. IND,W,240,33,...,5,32,37,17,6,4,18,22,98,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53671,2020-21,1610612749,MIL,Milwaukee Bucks,0042000404,2021-07-14,MIL vs. PHX,W,240,39,...,17,31,48,22,11,3,5,18,109,6
53672,2020-21,1610612756,PHX,Phoenix Suns,0042000405,2021-07-17,PHX vs. MIL,L,240,48,...,8,27,35,23,9,5,9,20,119,-4
53673,2020-21,1610612749,MIL,Milwaukee Bucks,0042000405,2021-07-17,MIL @ PHX,W,240,50,...,11,26,37,26,7,1,12,17,123,4
53674,2020-21,1610612749,MIL,Milwaukee Bucks,0042000406,2021-07-20,MIL vs. PHX,W,240,37,...,11,42,53,20,10,6,19,17,105,7


In [5]:
connection.commit()
connection.close()

In [8]:
moneylines.sort_values('GM_DATE')

Unnamed: 0,SEASON,GM_DATE,HOME_TEAM,AWAY_TEAM,AWAY_ML,HOME_ML
15,2006-07,2006-10-31,L.A. Lakers,Phoenix,"-,-,-380,-","-,-,+290,-"
14,2006-07,2006-10-31,Miami,Chicago,"-,-,+180,-","-,-,-220,-"
21,2006-07,2006-11-01,Utah,Houston,"-,-,-,-","-,-,-,-"
24,2006-07,2006-11-01,Philadelphia,Atlanta,"-,-,+200,-","-,-,-240,-"
20,2006-07,2006-11-01,Minnesota,Sacramento,"-,-,+190,-","-,-,-230,-"
...,...,...,...,...,...,...
15320,2020-21,2021-07-08,Phoenix,Milwaukee,"+160,+170,+175,-","-185,-195,-210,-"
16238,2020-21,2021-07-11,Milwaukee,Phoenix,"+165,+175,+170,-","-190,-205,-200,-"
15321,2020-21,2021-07-14,Milwaukee,Phoenix,"+175,+162,+160,-","-205,-182,-180,-"
15322,2020-21,2021-07-17,Phoenix,Milwaukee,"+160,+152,+155,-","-185,-172,-175,-"


In [31]:
pd.read_sql_query(
    "SELECT * FROM moneylines", conn)

Unnamed: 0,SEASON,GM_DATE,HOME_TEAM,AWAY_TEAM,AWAY_ML,HOME_ML
0,2020-21,2020-12-22,Brooklyn,Golden State,"+290,+275,+275,-","-360,-330,-350,-"
1,2020-21,2020-12-22,L.A. Lakers,L.A. Clippers,"+110,+110,+120,-","-130,-130,-140,-"
2,2020-21,2020-12-23,Cleveland,Charlotte,"-155,-156,-155,-","+135,+136,+135,-"
3,2020-21,2020-12-23,Indiana,New York,"+270,+300,+265,-","-330,-370,-330,-"
4,2020-21,2020-12-23,Orlando,Miami,"-195,-195,-210,-","+170,+170,+175,-"
...,...,...,...,...,...,...
16234,2020-21,2021-06-17,Milwaukee,Brooklyn,"+185,+163,+155,-","-220,-183,-175,-"
16235,2020-21,2021-06-27,Atlanta,Milwaukee,"-210,-190,-200,-","+180,+165,+170,-"
16236,2020-21,2021-07-03,Atlanta,Milwaukee,"+150,+135,+130,-","-170,-155,-150,-"
16237,2020-21,2021-07-06,Phoenix,Milwaukee,"+185,+190,+190,-","-215,-220,-230,-"


In [39]:
season_string(2016)

'2016-17'

In [58]:
def get_season_games(season):
    season_str = season_string(season)
    gamelogs = []
    for season_type in ['Regular Season', 'Playoffs']:
        games = leaguegamelog.LeagueGameLog(season=season_str, season_type_all_star=season_type).get_data_frames()[0]
        gamelogs.append(games)
        
    df = pd.concat(gamelogs)
    
    df['HOME_TEAM'] = df['MATCHUP'].apply(
    lambda x: x[:3] if 'vs' in x else x[-3:])
    
    df['AWAY_TEAM'] = df['MATCHUP'].apply(
    lambda x: x[:3] if '@' in x else x[-3:])
    
    return df

get_season_games(season=2020)

def get_current_spreads_or_mls(conn, table_name='moneylines'):

    connection = sqlite3.connect('../data/nba.db')

    df = pd.read_sql("SELECT * FROM {}".format(table), connection)




    return df


games_2020 = get_season_games(2020)


merged_df = pd.merge(games_2020, moneylines_df, how='left', left_on=['HOME_TEAM', 'AWAY_TEAM', 'GAME_DATE'], right_on=['HOME_TEAM', 'AWAY_TEAM', 'GM_DATE'])

merged_df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,HOME_TEAM,AWAY_TEAM,SEASON,GM_DATE,AWAY_ML,HOME_ML
0,22020,1610612751,BKN,Brooklyn Nets,0022000001,2020-12-22,BKN vs. GSW,W,240,42,...,22,125,26,1,BKN,GSW,2020-21,2020-12-22,"+290,+275,+275,-","-360,-330,-350,-"
1,22020,1610612744,GSW,Golden State Warriors,0022000001,2020-12-22,GSW @ BKN,L,240,37,...,24,99,-26,1,BKN,GSW,2020-21,2020-12-22,"+290,+275,+275,-","-360,-330,-350,-"
2,22020,1610612746,LAC,LA Clippers,0022000002,2020-12-22,LAC @ LAL,W,240,44,...,29,116,7,1,LAL,LAC,2020-21,2020-12-22,"+110,+110,+120,-","-130,-130,-140,-"
3,22020,1610612747,LAL,Los Angeles Lakers,0022000002,2020-12-22,LAL vs. LAC,L,240,38,...,20,109,-7,1,LAL,LAC,2020-21,2020-12-22,"+110,+110,+120,-","-130,-130,-140,-"
4,22020,1610612738,BOS,Boston Celtics,0022000003,2020-12-23,BOS vs. MIL,W,240,48,...,17,122,1,1,BOS,MIL,2020-21,2020-12-23,"-190,-195,-200,-","+165,+170,+170,-"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2325,42020,1610612749,MIL,Milwaukee Bucks,0042000404,2021-07-14,MIL vs. PHX,W,240,39,...,18,109,6,1,MIL,PHX,2020-21,2021-07-14,"+175,+162,+160,-","-205,-182,-180,-"
2326,42020,1610612756,PHX,Phoenix Suns,0042000405,2021-07-17,PHX vs. MIL,L,240,48,...,20,119,-4,1,PHX,MIL,2020-21,2021-07-17,"+160,+152,+155,-","-185,-172,-175,-"
2327,42020,1610612749,MIL,Milwaukee Bucks,0042000405,2021-07-17,MIL @ PHX,W,240,50,...,17,123,4,1,PHX,MIL,2020-21,2021-07-17,"+160,+152,+155,-","-185,-172,-175,-"
2328,42020,1610612749,MIL,Milwaukee Bucks,0042000406,2021-07-20,MIL vs. PHX,W,240,37,...,17,105,7,1,MIL,PHX,2020-21,2021-07-20,"+165,+159,+155,-","-190,-179,-175,-"


In [8]:
def update_moneylines(conn, season=2021, custom_dates=[]):
    table_name = 'moneylines'
    # Get current moneyline data

    if len(custom_dates) == 0:
        current_ml_data = pd.read_sql_query(
            "SELECT * FROM moneylines", conn)

        abbr_mapping = {'Boston': 'BOS', 'Portland': 'POR',
                        'L.A. Lakers': 'LAL', 'Brooklyn': 'BKN',
                        'Cleveland': 'CLE', 'Toronto': 'TOR',
                        'Philadelphia': 'PHI', 'Memphis': 'MEM',
                        'Minnesota': 'MIN', 'New Orleans': 'NOP',
                        'Oklahoma City': 'OKC', 'Dallas': 'DAL',
                        'San Antonio': 'SAS', 'Denver': 'DEN',
                        'Golden State': 'GSW', 'L.A. Clippers': 'LAC',
                        'Orlando': 'ORL', 'Utah': 'UTA',
                        'Charlotte': 'CHA', 'Detroit': 'DET',
                        'Miami': 'MIA', 'Phoenix': 'PHX',
                        'Atlanta': 'ATL', 'New York': 'NYK',
                        'Indiana': 'IND', 'Chicago': 'CHI',
                        'Houston': 'HOU', 'Milwaukee': 'MIL',
                        'Sacramento': 'SAC', 'Washington': 'WAS'}

        current_ml_data['HOME_TEAM'] = current_ml_data['HOME_TEAM'].replace(
            abbr_mapping)
        current_ml_data['AWAY_TEAM'] = current_ml_data['AWAY_TEAM'].replace(
            abbr_mapping)

        up_to_date_games = get_season_games(season)

        merged_df = pd.merge(up_to_date_games, current_ml_data, how='left', left_on=[
                             'HOME_TEAM', 'AWAY_TEAM', 'GAME_DATE'], right_on=['HOME_TEAM', 'AWAY_TEAM', 'GM_DATE'])
        
        
        missing_dates = merged_df.loc[merged_df['AWAY_ML'].isnull(), 'GAME_DATE'].unique().tolist()
        
#         current_dates = current_dates['GM_DATE'].tolist()

#         up_to_date_dates = get_game_dates(season)

#         missing_dates = set(up_to_date_dates) - set(current_dates)

#         print("Updating moneylines for {} days".format(len(missing_dates)))

    else:
        missing_dates = custom_dates

    seasons = []
    gm_dates = []
    away_teams = []
    home_teams = []
    away_mls = []
    home_mls = []

    for date in tqdm(missing_dates, desc='progress'):
        web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/money-line/?date={}'.format(
            date)
        path = '../chromedriver.exe'
        driver = webdriver.Chrome(path)
        driver.get(web)
        sleep(random.randint(1, 2))

        try:
            single_row_events = driver.find_elements_by_class_name(
                'eventMarketGridContainer-3QipG')

        except:
            print("No Data for {}".format(date))
            dates_with_no_data.append(date)
            continue

        num_postponed_events = len(
            driver.find_elements_by_class_name('eventStatus-3EHqw'))

        num_listed_events = len(single_row_events)
        cutoff = num_listed_events - num_postponed_events

        for event in single_row_events[:cutoff]:
            seasons.append(season_string(season))

            away_team = event.find_elements_by_class_name(
                'participantBox-3ar9Y')[0].text
            home_team = event.find_elements_by_class_name(
                'participantBox-3ar9Y')[1].text

            away_teams.append(away_team)
            home_teams.append(home_team)

            gm_dates.append(date)

            mls = event.find_elements_by_class_name('pointer-2j4Dk')

            away_moneyline = []
            home_moneyline = []

            for i, ml in enumerate(mls):
                if i % 2 == 0:
                    away_moneyline.append(ml.text)
                else:
                    home_moneyline.append(ml.text)

            away_moneyline = ",".join(away_moneyline)
            home_moneyline = ",".join(home_moneyline)

            away_mls.append(away_moneyline)
            home_mls.append(home_moneyline)

        driver.quit()
        sleep(random.randint(1, 2))

    clear_output(wait=True)

    df = pd.DataFrame({'SEASON': seasons,
                       'GM_DATE': gm_dates,
                       'AWAY_TEAM': away_teams,
                      'HOME_TEAM': home_teams,
                       'AWAY_ML': away_mls,
                       'HOME_ML': home_mls,
                       })

    df = df.sort_values(['GM_DATE']).reset_index(drop=True)

    df.to_sql(table_name, conn, if_exists='append', index=False)

    cur = connection.cursor()
    cur.execute('''DELETE FROM moneylines 
                    WHERE rowid NOT IN (SELECT MIN(rowid) FROM moneylines
                                        GROUP BY GM_DATE, AWAY_TEAM, HOME_TEAM, AWAY_ML, HOME_ML)''')
    conn.commit()

    return None

In [9]:

connection = sqlite3.connect("../data/nba.db")

update_moneylines(connection, season=2021)

NameError: name 'get_season_games' is not defined

In [67]:
connection = sqlite3.connect("../data/nba.db")

for season in range(2006, 2021):
    update_moneylines(connection, season=season)
    
connection.close()

progress: 0it [00:00, ?it/s]


In [66]:
def update_spreads(conn, season = 2021, custom_dates=[]):
    table_name = 'spreads'
    # Get current spread data
    
    if len(custom_dates) == 0:
        current_spread_data = pd.read_sql_query(
            "SELECT * FROM spreads", conn)

        abbr_mapping = {'Boston': 'BOS', 'Portland': 'POR',
                        'L.A. Lakers': 'LAL', 'Brooklyn': 'BKN',
                        'Cleveland': 'CLE', 'Toronto': 'TOR',
                        'Philadelphia': 'PHI', 'Memphis': 'MEM',
                        'Minnesota': 'MIN', 'New Orleans': 'NOP',
                        'Oklahoma City': 'OKC', 'Dallas': 'DAL',
                        'San Antonio': 'SAS', 'Denver': 'DEN',
                        'Golden State': 'GSW', 'L.A. Clippers': 'LAC',
                        'Orlando': 'ORL', 'Utah': 'UTA',
                        'Charlotte': 'CHA', 'Detroit': 'DET',
                        'Miami': 'MIA', 'Phoenix': 'PHX',
                        'Atlanta': 'ATL', 'New York': 'NYK',
                        'Indiana': 'IND', 'Chicago': 'CHI',
                        'Houston': 'HOU', 'Milwaukee': 'MIL',
                        'Sacramento': 'SAC', 'Washington': 'WAS'}

        current_spread_data['HOME_TEAM'] = current_spread_data['HOME_TEAM'].replace(
            abbr_mapping)
        current_spread_data['AWAY_TEAM'] = current_spread_data['AWAY_TEAM'].replace(
            abbr_mapping)

        up_to_date_games = get_season_games(season)

        merged_df = pd.merge(up_to_date_games, current_spread_data, how='left', left_on=[
                             'HOME_TEAM', 'AWAY_TEAM', 'GAME_DATE'], right_on=['HOME_TEAM', 'AWAY_TEAM', 'GM_DATE'])
        
        
        missing_dates = merged_df.loc[merged_df['AWAY_SPREAD'].isnull(), 'GAME_DATE'].unique().tolist()

        print("Updating spreads for {} days".format(len(missing_dates)))

    else:
        missing_dates = custom_dates
        
    seasons = []
    gm_dates = []
    away_teams = []
    home_teams = []
    away_scoreboards = []
    home_scoreboards = []
    away_spreads = []
    home_spreads = []
    
    
    for date in tqdm(missing_dates, desc='progress'):
            web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/?date={}'.format(date)
            path = '../chromedriver.exe'
            driver = webdriver.Chrome(path)
            driver.get(web)
            sleep(random.randint(1,2))

            try:
                single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')
                
            except:
                print("No Data for {}".format(date))
                dates_with_no_data.append(date)
                continue
                
            num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

            num_listed_events = len(single_row_events)
            cutoff = num_listed_events - num_postponed_events

            for event in single_row_events[:cutoff]:

                away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
                home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text
                away_teams.append(away_team)
                home_teams.append(home_team)
                gm_dates.append(date)

                seasons.append(season_string(season))
                
                scoreboard = event.find_elements_by_class_name('scoreboard-1TXQV')

                home_score = []
                away_score = []

                for score in scoreboard:
                    quarters = score.find_elements_by_class_name('scoreboardColumn-2OtpR')
                    for i in range(len(quarters)):
                        scores = quarters[i].text.split('\n')
                        away_score.append(scores[0])
                        home_score.append(scores[1])
                        
                    home_score = ",".join(home_score)
                    away_score = ",".join(away_score)
                    
                    away_scoreboards.append(away_score)
                    home_scoreboards.append(home_score)


                if len(away_scoreboards) != len(away_teams):
                    num_to_add = len(away_teams) - len(away_scoreboards)
                    for i in range(num_to_add):
                        away_scoreboards.append('')
                        home_scoreboards.append('')

                spreads = event.find_elements_by_class_name('pointer-2j4Dk')
                away_lines = []
                home_lines = []
                for i in range(len(spreads)):    
                    if i % 2 == 0:
                        away_lines.append(spreads[i].text)
                    else:
                        home_lines.append(spreads[i].text)
                
                away_lines = ",".join(away_lines)
                home_lines = ",".join(home_lines)
                
                away_spreads.append(away_lines)
                home_spreads.append(home_lines)

                if len(away_spreads) != len(away_teams):
                    num_to_add = len(away_teams) - len(away_spreads)
                    for i in range(num_to_add):
                        away_scoreboards.append('')
                        home_scoreboards.append('')

            driver.quit()
            clear_output(wait=True)

    df = pd.DataFrame({'SEASON':seasons, 
                      'GM_DATE':gm_dates,
                      'AWAY_TEAM':away_teams,
                      'HOME_TEAM':home_teams,
                      'AWAY_SCOREBOARD':away_scoreboards,
                      'HOME_SCOREBOARD':home_scoreboards,
                      'AWAY_SPREAD':away_spreads,
                      'HOME_SPREAD':home_spreads})


    df.to_sql(table_name, conn, if_exists='append', index=False)
    
    cur = connection.cursor()
    cur.execute('''DELETE FROM spreads 
                    WHERE rowid NOT IN (SELECT MIN(rowid) FROM spreads 
                                        GROUP BY GM_DATE, AWAY_TEAM, HOME_TEAM, AWAY_SPREAD, HOME_SPREAD)''')
    conn.commit()
    
    return None


In [111]:
update_spreads(connection, season=2006)


progress: 100%|████████████████████████████████████████████████████████████████████████| 23/23 [04:17<00:00, 11.21s/it]


In [7]:
connection.close()

In [68]:
connection = sqlite3.connect("../data/nba.db")

for season in range(2006, 2021):
    update_spreads(connection, season=season)
    
connection.close()

progress: 100%|██████████████████████████████████████████████████████████████████████████| 1/1 [00:09<00:00,  9.95s/it]
progress: 0it [00:00, ?it/s]

Updating spreads for 0 days



progress: 0it [00:00, ?it/s]

Updating spreads for 0 days



progress: 0it [00:00, ?it/s]

Updating spreads for 0 days



progress: 0it [00:00, ?it/s]

Updating spreads for 0 days



progress: 0it [00:00, ?it/s]

Updating spreads for 0 days



progress: 0it [00:00, ?it/s]

Updating spreads for 0 days





Unnamed: 0,SEASON,GM_DATE,HOME_TEAM,AWAY_TEAM,AWAY_SCOREBOARD,HOME_SCOREBOARD,AWAY_SPREAD,HOME_SPREAD
0,2006-07,2006-10-31,Miami,Chicago,22372128108,1614211566,"-,-,+4½-110,-","-,-,-4½-110,-"
1,2006-07,2006-10-31,L.A. Lakers,Phoenix,41172127106,26273427114,"-,-,-8½-110,-","-,-,+8½-110,-"
2,2006-07,2006-11-02,Dallas,San Antonio,2619272597,2724241691,"-,-,+3-110,-","-,-,-3-110,-"
3,2006-07,2006-11-02,L.A. Clippers,Denver,2424222595,1831163196,"-,-,+4½-110,-","-,-,-4½-110,-"
4,2006-07,2006-11-03,Memphis,Charlotte,2126231383,2420163696,"-,-,+5-110,-","-,-,-5-110,-"
...,...,...,...,...,...,...,...,...
1097,2006-07,2007-01-06,Cleveland,Brooklyn,2117272691,2719252596,,
1098,2006-07,2007-01-06,New Orleans,Indiana,23272129100,1828242393,"-,-,-10½-105,-","-,-,+10½-115,-"
1099,2006-07,2007-01-06,Chicago,Detroit,2818202389,32232922106,"-,-,+5-110,-","-,-,-5-110,-"
1100,2006-07,2007-01-06,Denver,Utah,2721173196,1616302284,"-,-,-6-110,-","-,-,+6-110,-"


In [71]:
connection = sqlite3.connect("../data/nba.db")

spreads = pd.read_sql("SELECT * FROM spreads", connection)

spreads.sort_values('GM_DATE')

Unnamed: 0,SEASON,GM_DATE,HOME_TEAM,AWAY_TEAM,AWAY_SCOREBOARD,HOME_SCOREBOARD,AWAY_SPREAD,HOME_SPREAD
0,2006-07,2006-10-31,Miami,Chicago,22372128108,1614211566,"-,-,+4½-110,-","-,-,-4½-110,-"
1,2006-07,2006-10-31,L.A. Lakers,Phoenix,41172127106,26273427114,"-,-,-8½-110,-","-,-,+8½-110,-"
1004,2006-07,2006-11-01,Philadelphia,Atlanta,1421231775,2920241588,"-,-,+6-115,-","-,-,-6-105,-"
1005,2006-07,2006-11-01,Boston,New Orleans,2628211691,1625242287,"-,-,+3½-110,-","-,-,-3½-110,-"
1006,2006-07,2006-11-01,Brooklyn,Toronto,2722212292,28252128102,"-,-,+7-110,-","-,-,-7-110,-"
...,...,...,...,...,...,...,...,...
18771,2020-21,2021-07-08,Phoenix,Milwaukee,29163330108,26303230118,"+4-108,+4½-105,+4½-105,-","-4-108,-4½-115,-4½-115,-"
18772,2020-21,2021-07-11,Milwaukee,Phoenix,28173124100,25353822120,"+5-108,+4½-105,+4½-110,-","-5-108,-4½-115,-4½-110,-"
18773,2020-21,2021-07-14,Milwaukee,Phoenix,23293021103,20322433109,"+5-108,+4½-113,+4½-115,-","-5-108,-4½-107,-4½-105,-"
18774,2020-21,2021-07-17,Phoenix,Milwaukee,21433623123,37242929119,"+4-108,+4-111,+4½-115,-","-4-108,-4-109,-4½-105,-"


In [73]:
spreads.loc[spreads['SEASON'] == '2020-21']

Unnamed: 0,SEASON,GM_DATE,HOME_TEAM,AWAY_TEAM,AWAY_SCOREBOARD,HOME_SCOREBOARD,AWAY_SPREAD,HOME_SPREAD
17611,2020-21,2020-12-22,Brooklyn,Golden State,2520262899,40233626125,"+7½-108,+7-110,+7½-110,-","-7½-108,-7-110,-7½-110,-"
17612,2020-21,2020-12-22,L.A. Lakers,L.A. Clippers,39173327116,19352431109,"+2-108,+2-117,+2½-105,-","-2-108,-2-103,-2½-115,-"
17613,2020-21,2020-12-23,Phoenix,Dallas,22232631102,23302429106,"+2-108,+1½-106,+1½-110,-","-2-108,-1½-114,-1½-110,-"
17614,2020-21,2020-12-23,Portland,Utah,32332926120,25192432100,"-1½-108,-2-105,-1½-110,-","+1½-108,+2-115,+1½-110,-"
17615,2020-21,2020-12-23,Denver,Sacramento,2528362312124,3129242810122,"+7½-108,+7½-108,+7½-110,-","-7½-108,-7½-112,-7½-110,-"
...,...,...,...,...,...,...,...,...
18771,2020-21,2021-07-08,Phoenix,Milwaukee,29163330108,26303230118,"+4-108,+4½-105,+4½-105,-","-4-108,-4½-115,-4½-115,-"
18772,2020-21,2021-07-11,Milwaukee,Phoenix,28173124100,25353822120,"+5-108,+4½-105,+4½-110,-","-5-108,-4½-115,-4½-110,-"
18773,2020-21,2021-07-14,Milwaukee,Phoenix,23293021103,20322433109,"+5-108,+4½-113,+4½-115,-","-5-108,-4½-107,-4½-105,-"
18774,2020-21,2021-07-17,Phoenix,Milwaukee,21433623123,37242929119,"+4-108,+4-111,+4½-115,-","-4-108,-4-109,-4½-105,-"


In [75]:
moneylines = pd.read_sql("SELECT * FROM moneylines", connection)

moneylines.loc[moneylines['SEASON'] == '2020-21']

moneylines

Unnamed: 0,SEASON,GM_DATE,HOME_TEAM,AWAY_TEAM,AWAY_ML,HOME_ML
0,2020-21,2020-12-22,Brooklyn,Golden State,"+290,+275,+275,-","-360,-330,-350,-"
1,2020-21,2020-12-22,L.A. Lakers,L.A. Clippers,"+110,+110,+120,-","-130,-130,-140,-"
2,2020-21,2020-12-23,Cleveland,Charlotte,"-155,-156,-155,-","+135,+136,+135,-"
3,2020-21,2020-12-23,Indiana,New York,"+270,+300,+265,-","-330,-370,-330,-"
4,2020-21,2020-12-23,Orlando,Miami,"-195,-195,-210,-","+170,+170,+175,-"
...,...,...,...,...,...,...
18878,2019-20,2020-08-05,Washington,Philadelphia,"-610,-550,-620,-","+460,+425,+410,-"
18879,2019-20,2020-08-05,San Antonio,Denver,"-130,-135,-140,-","+110,+115,+120,-"
18880,2019-20,2020-08-11,San Antonio,Houston,"+155,+163,+145,-","-175,-183,-165,-"
18881,2019-20,2020-08-11,Orlando,Brooklyn,"+160,+162,+165,-","-185,-182,-190,-"


In [98]:
moneylines = moneylines.loc[(spreads['AWAY_ML'] != '') & (moneylines['HOME_ML'] != '')]
moneylines

Unnamed: 0,SEASON,GM_DATE,HOME_TEAM,AWAY_TEAM,AWAY_ML,HOME_ML
0,2020-21,2020-12-22,Brooklyn,Golden State,"+290,+275,+275,-","-360,-330,-350,-"
1,2020-21,2020-12-22,L.A. Lakers,L.A. Clippers,"+110,+110,+120,-","-130,-130,-140,-"
2,2020-21,2020-12-23,Cleveland,Charlotte,"-155,-156,-155,-","+135,+136,+135,-"
3,2020-21,2020-12-23,Indiana,New York,"+270,+300,+265,-","-330,-370,-330,-"
4,2020-21,2020-12-23,Orlando,Miami,"-195,-195,-210,-","+170,+170,+175,-"
...,...,...,...,...,...,...
18844,2020-21,2021-07-17,Phoenix,Milwaukee,"+160,+152,+155,-","-185,-172,-175,-"
18845,2020-21,2021-07-20,Milwaukee,Phoenix,"+165,+159,+155,-","-190,-179,-175,-"
18850,2020-21,2021-01-11,Atlanta,Philadelphia,"+180,+188,+185,-","-210,-218,-225,-"
18851,2020-21,2021-01-11,Portland,Toronto,"+150,+162,+160,-","-170,-182,-180,-"


In [93]:
moneylines.to_sql("")

Unnamed: 0,SEASON,GM_DATE,HOME_TEAM,AWAY_TEAM,AWAY_ML,HOME_ML
0,2020-21,2020-12-22,Brooklyn,Golden State,"+290,+275,+275,-","-360,-330,-350,-"
1,2020-21,2020-12-22,L.A. Lakers,L.A. Clippers,"+110,+110,+120,-","-130,-130,-140,-"
2,2020-21,2020-12-23,Cleveland,Charlotte,"-155,-156,-155,-","+135,+136,+135,-"
3,2020-21,2020-12-23,Indiana,New York,"+270,+300,+265,-","-330,-370,-330,-"
4,2020-21,2020-12-23,Orlando,Miami,"-195,-195,-210,-","+170,+170,+175,-"
...,...,...,...,...,...,...
18844,2020-21,2021-07-17,Phoenix,Milwaukee,"+160,+152,+155,-","-185,-172,-175,-"
18845,2020-21,2021-07-20,Milwaukee,Phoenix,"+165,+159,+155,-","-190,-179,-175,-"
18850,2020-21,2021-01-11,Atlanta,Philadelphia,"+180,+188,+185,-","-210,-218,-225,-"
18851,2020-21,2021-01-11,Portland,Toronto,"+150,+162,+160,-","-170,-182,-180,-"


In [133]:
# Get Spreads


gm_date = []
away_teams = []
home_teams = []
away_scoreboards = []
home_scoreboards = []
away_spreads = []
home_spreads = []

for date in tqdm(dates_to_add, desc='progress'):
    web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/?date={}'.format(date)
    path = '../chromedriver.exe'
    driver = webdriver.Chrome(path)
    driver.get(web)

    single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')

    num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

    num_listed_events = len(single_row_events)
    cutoff = num_listed_events - num_postponed_events

    for event in single_row_events[:cutoff]:

        away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
        home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text
        away_teams.append(away_team)
        home_teams.append(home_team)
        gm_date.append(date)


        scoreboard = event.find_elements_by_class_name('scoreboard-1TXQV')

        home_score = []
        away_score = []

        for score in scoreboard:
            quarters = score.find_elements_by_class_name('scoreboardColumn-2OtpR')
            for i in range(len(quarters)):
                scores = quarters[i].text.split('\n')
                away_score.append(scores[0])
                home_score.append(scores[1])
            away_scoreboards.append(away_score)
            home_scoreboards.append(home_score)
            
            
        if len(away_scoreboards) != len(away_teams):
            num_to_add = len(away_teams) - len(away_scoreboards)
            for i in range(num_to_add):
                away_scoreboards.append([])
                home_scoreboards.append([])
            
        
        spreads = event.find_elements_by_class_name('pointer-2j4Dk')
        away_lines = []
        home_lines = []
        for i in range(len(spreads)):    
            if i % 2 == 0:
                away_lines.append(spreads[i].text)
            else:
                home_lines.append(spreads[i].text)
        away_spreads.append(away_lines)
        home_spreads.append(home_lines)
        
        if len(away_spreads) != len(away_teams):
            num_to_add = len(away_teams) - len(away_spreads)
            for i in range(num_to_add):
                away_scoreboards.append([])
                home_scoreboards.append([])
            
        
    driver.quit()
    sleep(random.randint(1,2))

spread_df = pd.DataFrame({'away_team':away_teams,
                  'home_team':home_teams,
                   'game_date':gm_date,
                  'away_scoreboard':away_scoreboards,
                  'home_scoreboard':home_scoreboards,
                  'away_spreads':away_spreads,
                  'home_spreads':home_spreads})

spread_df


progress:   0%|                                                                                | 0/627 [00:00<?, ?it/s]

adding: 627 dates


progress: 100%|████████████████████████████████████████████████████████████████████| 627/627 [2:27:34<00:00, 14.12s/it]


Unnamed: 0,away_team,home_team,game_date,away_scoreboard,home_scoreboard,away_moneyline,home_moneyline
0,San Antonio,Houston,2017-05-11,"[31, 30, 26, 27, 114]","[24, 18, 22, 11, 75]","[+9-115, +9-108, +8½-110, +9-105]","[-9-105, -9-112, -8½-110, -9-105]"
1,Atlanta,Boston,2008-05-04,"[16, 10, 17, 22, 65]","[27, 17, 35, 20, 99]","[+14½-105, +15-110, +15-110, -]","[-14½-115, -15-110, -15-110, -]"
2,Utah,L.A. Lakers,2008-05-04,"[24, 17, 31, 26, 98]","[25, 29, 25, 30, 109]","[+8½-110, +8½-105, +8½-110, -]","[-8½-110, -8½-115, -8½-110, -]"
3,L.A. Lakers,Houston,2009-05-10,"[16, 20, 18, 33, 87]","[29, 25, 29, 16, 99]",[],[]
4,Boston,Orlando,2009-05-10,"[25, 23, 31, 16, 95]","[28, 18, 25, 23, 94]","[+5-115, +5½-110, +5½-110, -]","[-5-105, -5½-110, -5½-110, -]"
...,...,...,...,...,...,...,...
1167,Phoenix,San Antonio,2007-05-12,"[30, 23, 19, 29, 101]","[25, 30, 25, 28, 108]","[+4-110, +4½-112, +4-110, -]","[-4-110, -4½-108, -4-110, -]"
1168,Toronto,Orlando,2008-04-28,"[26, 24, 18, 24, 92]","[22, 27, 25, 28, 102]","[+7½-105, +7½-110, +7-110, -]","[-7½-115, -7½-110, -7-110, -]"
1169,Boston,Atlanta,2008-04-28,"[24, 24, 27, 17, 92]","[29, 22, 14, 32, 97]","[-10-105, -9-110, -9-110, -]","[+10-115, +9-110, +9-110, -]"
1170,L.A. Lakers,Denver,2008-04-28,"[32, 32, 15, 28, 107]","[23, 31, 23, 24, 101]","[-5-105, -4-110, -4-110, -]","[+5-115, +4-110, +4-110, -]"


In [135]:
spreads_orig = pd.read_csv("../data/all_spreads_sbr.csv")

updated_spreads = pd.concat([spreads_orig, spread_df])

updated_spreads['game_date'] = pd.to_datetime(updated_spreads['game_date'])
updated_spreads.sort_values(['game_date'], inplace=True)

updated_spreads.rename(columns={'away_moneyline':'away_spread',
                               'home_moneyline':'home_spread'}, inplace=True)
updated_spreads

Unnamed: 0,away_team,home_team,game_date,away_scoreboard,home_scoreboard,away_spread,home_spread
0,Chicago,Miami,2006-10-31,"['22', '37', '21', '28', '108']","['16', '14', '21', '15', '66']","['+4½-110', '-', '+4½-110', '-']","['-4½-110', '-', '-4½-110', '-']"
1129,Phoenix,L.A. Lakers,2006-10-31,"['41', '17', '21', '27', '106']","['26', '27', '34', '27', '114']","['-8½-110', '-', '-8-110', '-']","['+8½-110', '-', '+8-110', '-']"
1128,Chicago,Miami,2006-10-31,"['22', '37', '21', '28', '108']","['16', '14', '21', '15', '66']","['+4½-110', '-', '+4½-110', '-']","['-4½-110', '-', '-4½-110', '-']"
1,Phoenix,L.A. Lakers,2006-10-31,"['41', '17', '21', '27', '106']","['26', '27', '34', '27', '114']","['-8½-110', '-', '-8-110', '-']","['+8½-110', '-', '+8-110', '-']"
3,Chicago,Orlando,2006-11-01,"['20', '25', '25', '24', '94']","['32', '31', '23', '23', '109']","['-2-110', '-', '-1-110', '-']","['+2-110', '-', '+1-110', '-']"
...,...,...,...,...,...,...,...
501,Atlanta,Orlando,2021-03-03,"[27, 15, 36, 37, 115]","[39, 22, 29, 22, 112]","[-3-105, -3-107, -2½-110, -2½-108]","[+3-115, +3-113, +2½-110, +2½-108]"
500,Brooklyn,Houston,2021-03-03,"[30, 37, 34, 31, 132]","[27, 27, 31, 29, 114]","[-10½-115, -10½-110, -11-110, -11-108]","[+10½-105, +10½-110, +11-110, +11-108]"
499,Detroit,Toronto,2021-03-03,"[43, 26, 29, 31, 129]","[37, 23, 20, 25, 105]","[+7½-110, +7½-110, +7½-105, +8-108]","[-7½-110, -7½-110, -7½-115, -8-108]"
506,Golden State,Portland,2021-03-03,"[29, 27, 24, 26, 106]","[28, 27, 27, 26, 108]","[-5½-120, -1-115, -1-115, -1-108]","[+5½-120, +1-105, +1-105, +1-108]"


In [138]:
updated_spreads = updated_spreads.drop_duplicates(['away_team','home_team', 'game_date'])

In [139]:
updated_spreads.to_csv("../data/all_spreads_sbr.csv", index=False)

In [114]:
spread_df

Unnamed: 0,away_team,home_team,game_date,away_scoreboard,home_scoreboard,away_moneyline,home_moneyline
0,Chicago,Miami,2006-10-31,"[22, 37, 21, 28, 108]","[16, 14, 21, 15, 66]","[+4½-110, -, +4½-110, -]","[-4½-110, -, -4½-110, -]"
1,Phoenix,L.A. Lakers,2006-10-31,"[41, 17, 21, 27, 106]","[26, 27, 34, 27, 114]","[-8½-110, -, -8-110, -]","[+8½-110, -, +8-110, -]"
2,Indiana,Charlotte,2006-11-01,"[23, 26, 28, 29, 106]","[27, 23, 23, 26, 99]","[-1-110, -, -1-110, -]","[+1-110, -, +1-110, -]"
3,Chicago,Orlando,2006-11-01,"[20, 25, 25, 24, 94]","[32, 31, 23, 23, 109]","[-2-110, -, -1-110, -]","[+2-110, -, +1-110, -]"
4,Atlanta,Philadelphia,2006-11-01,"[14, 21, 23, 17, 75]","[29, 20, 24, 15, 88]","[+6-115, -, +5½-110, -]","[-6-105, -, -5½-110, -]"
...,...,...,...,...,...,...,...
33579,Atlanta,Miami,2021-03-02,"[23, 21, 19, 31, 94]","[17, 20, 29, 14, 80]","[+2-110, +2½-115, +2½-110, +2-108]","[-2-110, -2½-105, -2½-110, -2-108]"
33580,L.A. Clippers,Boston,2021-03-02,"[32, 31, 26, 23, 112]","[35, 27, 26, 29, 117]","[+8½-120, -5-110, -3-110, -5-108]","[-8½-120, +5-110, +3-110, +5-108]"
33581,New York,San Antonio,2021-03-02,"[23, 24, 21, 25, 93]","[25, 26, 36, 32, 119]","[-2-105, -1½-113, -1½-105, -2-108]","[+2-115, +1½-107, +1½-115, +2-108]"
33582,Denver,Milwaukee,2021-03-02,"[37, 27, 35, 29, 128]","[23, 27, 30, 17, 97]","[+8-110, +8-109, +7½-105, +8-108]","[-8-110, -8-111, -7½-115, -8-108]"


In [115]:
spread_df.rename(columns={'away_moneyline':'away_spread', 'home_moneyline':'home_spread'})
spread_df.to_csv("../data/all_spreads_sbr.csv", index=False)

In [82]:

gm_date = []
away_teams = []
home_teams = []
away_scoreboards = []
home_scoreboards = []
away_moneylines = []
home_moneylines = []

web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/money-line/?date={}'.format(date)
path = '../chromedriver.exe'
driver = webdriver.Chrome(path)
driver.get(web)

single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')

num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

num_listed_events = len(single_row_events)
cutoff = num_listed_events - num_postponed_events

for event in single_row_events[:cutoff]:

    away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
    home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text
    away_teams.append(away_team)
    home_teams.append(home_team)
    gm_date.append(date)


    scoreboard = event.find_elements_by_class_name('scoreboard-1TXQV')

    home_score = []
    away_score = []

    for score in scoreboard:
        quarters = score.find_elements_by_class_name('scoreboardColumn-2OtpR')
        for i in range(len(quarters)):
            scores = quarters[i].text.split('\n')
            away_score.append(scores[0])
            home_score.append(scores[1])

        away_scoreboards.append(away_score)
        home_scoreboards.append(home_score)

    mls = event.find_elements_by_class_name('pointer-2j4Dk')
    away_lines = []
    home_lines = []
    for i in range(len(mls)):    
        if i % 2 == 0:
            away_lines.append(mls[i].text)
        else:
            home_lines.append(mls[i].text)
    away_moneylines.append(away_lines)
    home_moneylines.append(home_lines)

    driver.quit()

### 1st Half Spreads

In [4]:

seasons = ['200{}-0{}'.format(x, x+1) if x!=9 else '2009-10' for x in range(6, 10)]
seasons2 = ['20{}-{}'.format(x, x+1) for x in range(10, 20)]
seasons.extend(seasons2)
seasons
dates = []
for season in seasons:
    df = pd.read_csv("../data/basic_team_boxscores/team_gamelogs_{}.csv".format(season))
    season_dates = df['GAME_DATE'].unique()
    dates.extend(season_dates)
    
dates[:10]

['2006-10-31',
 '2006-11-01',
 '2006-11-02',
 '2006-11-03',
 '2006-11-04',
 '2006-11-05',
 '2006-11-06',
 '2006-11-07',
 '2006-11-08',
 '2006-11-09']

In [10]:
dates = []
for season_type in ['Regular Season', 'Playoffs']:
    gamelog = leaguegamelog.LeagueGameLog(season_type_all_star=season_type).get_data_frames()[0]    
    dates.extend(gamelog['GAME_DATE'].tolist())
    
dates[-1]
    
    

'2021-06-07'

In [5]:
current_spreads_1H = pd.read_csv("../data/spreads_1H.csv")
missing_dates = set([])
# spread_df.loc[spread_df['away_1H_spread'] == "-, -, -, -"]
# spread_df.loc[spread_df['home_1H_spread'] == "-, -, -, -"]
dates = current_spreads_1H.loc[current_spreads_1H['home_1H_spread'].isnull(), 'game_date'].astype(str)
missing_dates.update(dates)
# spread_df.loc[274:274]
len(missing_dates)

missing_dates

set()

In [11]:
current_dates = current_spreads_1H.loc[current_spreads_1H['home_1H_spread'].isnull(), 'game_date'].astype(str)
set(dates) - set(current_dates) 

{'2020-12-22',
 '2020-12-23',
 '2020-12-25',
 '2020-12-26',
 '2020-12-27',
 '2020-12-28',
 '2020-12-29',
 '2020-12-30',
 '2020-12-31',
 '2021-01-01',
 '2021-01-02',
 '2021-01-03',
 '2021-01-04',
 '2021-01-05',
 '2021-01-06',
 '2021-01-07',
 '2021-01-08',
 '2021-01-09',
 '2021-01-10',
 '2021-01-11',
 '2021-01-12',
 '2021-01-13',
 '2021-01-14',
 '2021-01-15',
 '2021-01-16',
 '2021-01-17',
 '2021-01-18',
 '2021-01-19',
 '2021-01-20',
 '2021-01-21',
 '2021-01-22',
 '2021-01-23',
 '2021-01-24',
 '2021-01-25',
 '2021-01-26',
 '2021-01-27',
 '2021-01-28',
 '2021-01-29',
 '2021-01-30',
 '2021-01-31',
 '2021-02-01',
 '2021-02-02',
 '2021-02-03',
 '2021-02-04',
 '2021-02-05',
 '2021-02-06',
 '2021-02-07',
 '2021-02-08',
 '2021-02-09',
 '2021-02-10',
 '2021-02-11',
 '2021-02-12',
 '2021-02-13',
 '2021-02-14',
 '2021-02-15',
 '2021-02-16',
 '2021-02-17',
 '2021-02-18',
 '2021-02-19',
 '2021-02-20',
 '2021-02-21',
 '2021-02-22',
 '2021-02-23',
 '2021-02-24',
 '2021-02-25',
 '2021-02-26',
 '2021-02-

In [31]:

gm_date = []
away_teams = []
home_teams = []
away_spreads = []
home_spreads = []


for date in tqdm(missing_dates, desc='progress'):
    web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/pointspread/1st-half/?date={}'.format(date)
    path = '../chromedriver.exe'
    driver = webdriver.Chrome(path)
    driver.get(web)
    
    sleep(random.randint(4,5))
    single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')

    num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

    num_listed_events = len(single_row_events)
    cutoff = num_listed_events - num_postponed_events

    for event in single_row_events[:cutoff]:

        away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
        home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text
        away_teams.append(away_team)
        home_teams.append(home_team)
        gm_date.append(date)


#         scoreboard = event.find_elements_by_class_name('scoreboard-1TXQV')

#         home_score = []
#         away_score = []

#         for score in scoreboard:
#             quarters = score.find_elements_by_class_name('scoreboardColumn-2OtpR')
#             for i in range(len(quarters)):
#                 scores = quarters[i].text.split('\n')
#                 away_score.append(scores[0])
#                 home_score.append(scores[1])
#             away_scoreboards.append(away_score)
#             home_scoreboards.append(home_score)
            
            
#         if len(away_scoreboards) != len(away_teams):
#             num_to_add = len(away_teams) - len(away_scoreboards)
#             for i in range(num_to_add):
#                 away_scoreboards.append([])
#                 home_scoreboards.append([])
            
        
        spreads = event.find_elements_by_class_name('pointer-2j4Dk')
        away_lines = []
        home_lines = []
        for i in range(len(spreads)):    
            if i % 2 == 0:
                away_lines.append(spreads[i].text)
            else:
                home_lines.append(spreads[i].text)
        away_spreads.append(away_lines)
        home_spreads.append(home_lines)
        
        if len(away_spreads) != len(away_teams):
            num_to_add = len(away_teams) - len(away_spreads)
            for i in range(num_to_add):
                away_spreads.append([])
                home_spreads.append([])
            
        
    driver.quit()

spreads_1H_df = pd.DataFrame({'away_team':away_teams,
                  'home_team':home_teams,
                   'game_date':gm_date,
                  'away_1H_spread':away_spreads,
                  'home_1H_spread':home_spreads})

spreads_1H_df


progress: 100%|██████████████████████████████████████████████████████████████████████████| 5/5 [01:10<00:00, 14.01s/it]


Unnamed: 0,away_team,home_team,game_date,away_1H_spread,home_1H_spread
0,Utah,Indiana,2019-11-27,"[PK-105, PK+100, +½-110, PK-105]","[PK-115, PK-120, -½-110, PK-115]"
1,Brooklyn,Boston,2019-11-27,"[+4½-105, +4½-115, +4½-115, +4½-110]","[-4½-115, -4½-105, -4½-105, -4½-110]"
2,Sacramento,Philadelphia,2019-11-27,"[+5½-110, +5½-110, +5½-105, +5½-110]","[-5½-110, -5½-110, -5½-115, -5½-110]"
3,Orlando,Cleveland,2019-11-27,"[PK-105, PK+100, +½-120, PK-105]","[PK-115, PK-120, -½+100, PK-115]"
4,Detroit,Charlotte,2019-11-27,"[-1½-115, -1½-110, -1½-105, -1½-110]","[+1½-105, +1½-110, +1½-115, +1½-110]"
5,New York,Toronto,2019-11-27,"[+6-110, +6-115, +5½-115, +6-115]","[-6-110, -6-105, -5½-105, -6-105]"
6,Atlanta,Milwaukee,2019-11-27,"[+8½-110, +8½-110, +8½-110, +8½-110]","[-8½-110, -8½-110, -8½-110, -8½-110]"
7,L.A. Clippers,Memphis,2019-11-27,"[-3-110, -3-101, -2½-110, -3-105]","[+3-110, +3-119, +2½-110, +3-115]"
8,Miami,Houston,2019-11-27,"[+4½-110, +4-105, +4½-110, +4-105]","[-4½-110, -4-115, -4½-110, -4-115]"
9,Minnesota,San Antonio,2019-11-27,"[+1½-110, +1½-110, +1½-110, +1½-105]","[-1½-110, -1½-110, -1½-110, -1½-115]"


In [32]:
current_spreads_1H

Unnamed: 0,away_team,home_team,game_date,away_1H_spread,home_1H_spread
0,Chicago,Miami,2006-10-31,"'-', '-', '+2½-110', '-'","'-', '-', '-2½-110', '-'"
1,Phoenix,L.A. Lakers,2006-10-31,"'-', '-', '-4½-115', '-'","'-', '-', '+4½-105', '-'"
2,Indiana,Charlotte,2006-11-01,"'-', '-', 'PK-110', '-'","'-', '-', 'PK-110', '-'"
3,Chicago,Orlando,2006-11-01,"'-', '-', 'PK-120', '-'","'-', '-', 'PK+100', '-'"
4,Atlanta,Philadelphia,2006-11-01,"'-', '-', '+2½-105', '-'","'-', '-', '-2½-115', '-'"
...,...,...,...,...,...
17702,Utah,Memphis,2019-11-29,,
17703,L.A. Clippers,San Antonio,2019-11-29,,
17704,Dallas,Phoenix,2019-11-29,,
17705,Chicago,Portland,2019-11-29,,


In [33]:
spreads_1H_df
for col in spreads_1H_df.columns[3:]:
    spreads_1H_df[col] = spreads_1H_df[col].astype(str)
    spreads_1H_df[col] = spreads_1H_df[col].str.replace("[", "")
    spreads_1H_df[col] = spreads_1H_df[col].str.replace("]", "")
    spreads_1H_df[col] = spreads_1H_df[col].str.strip()
    
spreads_1H_df

Unnamed: 0,away_team,home_team,game_date,away_1H_spread,home_1H_spread
0,Utah,Indiana,2019-11-27,"'PK-105', 'PK+100', '+½-110', 'PK-105'","'PK-115', 'PK-120', '-½-110', 'PK-115'"
1,Brooklyn,Boston,2019-11-27,"'+4½-105', '+4½-115', '+4½-115', '+4½-110'","'-4½-115', '-4½-105', '-4½-105', '-4½-110'"
2,Sacramento,Philadelphia,2019-11-27,"'+5½-110', '+5½-110', '+5½-105', '+5½-110'","'-5½-110', '-5½-110', '-5½-115', '-5½-110'"
3,Orlando,Cleveland,2019-11-27,"'PK-105', 'PK+100', '+½-120', 'PK-105'","'PK-115', 'PK-120', '-½+100', 'PK-115'"
4,Detroit,Charlotte,2019-11-27,"'-1½-115', '-1½-110', '-1½-105', '-1½-110'","'+1½-105', '+1½-110', '+1½-115', '+1½-110'"
5,New York,Toronto,2019-11-27,"'+6-110', '+6-115', '+5½-115', '+6-115'","'-6-110', '-6-105', '-5½-105', '-6-105'"
6,Atlanta,Milwaukee,2019-11-27,"'+8½-110', '+8½-110', '+8½-110', '+8½-110'","'-8½-110', '-8½-110', '-8½-110', '-8½-110'"
7,L.A. Clippers,Memphis,2019-11-27,"'-3-110', '-3-101', '-2½-110', '-3-105'","'+3-110', '+3-119', '+2½-110', '+3-115'"
8,Miami,Houston,2019-11-27,"'+4½-110', '+4-105', '+4½-110', '+4-105'","'-4½-110', '-4-115', '-4½-110', '-4-115'"
9,Minnesota,San Antonio,2019-11-27,"'+1½-110', '+1½-110', '+1½-110', '+1½-105'","'-1½-110', '-1½-110', '-1½-110', '-1½-115'"


In [34]:
current_spreads_1H = pd.concat([current_spreads_1H, spreads_1H_df])
current_spreads_1H = current_spreads_1H.drop_duplicates(subset = ['away_team', 'home_team', 'game_date'], keep='last')

In [35]:
spreads_1H_df['game_date'] = pd.to_datetime(spreads_1H_df['game_date'])
spreads_1H_df.sort_values('game_date', inplace=True)
current_spreads_1H.to_csv('../data/spreads_1H.csv', index=False)


In [36]:
current_spreads_1H

Unnamed: 0,away_team,home_team,game_date,away_1H_spread,home_1H_spread
0,Chicago,Miami,2006-10-31,"'-', '-', '+2½-110', '-'","'-', '-', '-2½-110', '-'"
1,Phoenix,L.A. Lakers,2006-10-31,"'-', '-', '-4½-115', '-'","'-', '-', '+4½-105', '-'"
2,Indiana,Charlotte,2006-11-01,"'-', '-', 'PK-110', '-'","'-', '-', 'PK-110', '-'"
3,Chicago,Orlando,2006-11-01,"'-', '-', 'PK-120', '-'","'-', '-', 'PK+100', '-'"
4,Atlanta,Philadelphia,2006-11-01,"'-', '-', '+2½-105', '-'","'-', '-', '-2½-115', '-'"
...,...,...,...,...,...
55,Utah,Memphis,2019-11-29,"'-3½-115', '-3-120', '-4-105', '-3½-115'","'+3½-105', '+3+100', '+4-115', '+3½-105'"
56,L.A. Clippers,San Antonio,2019-11-29,"'-2½-110', '-2½-110', '-2½-115', '-2½-115'","'+2½-110', '+2½-110', '+2½-105', '+2½-105'"
57,Dallas,Phoenix,2019-11-29,"'-½-110', 'PK-125', '-½-115', '-1-105'","'+½-110', 'PK+105', '+½-105', '+1-115'"
58,Chicago,Portland,2019-11-29,"'+4-115', '+3½-103', '+3½-105', '+4-110'","'-4-105', '-3½-117', '-3½-115', '-4-110'"


In [18]:
spreads_1H_df
for col in spreads_1H_df.columns[3:]:
    spreads_1H_df[col] = spreads_1H_df[col].astype(str)
    spreads_1H_df[col] = spreads_1H_df[col].str.replace("[", "")
    spreads_1H_df[col] = spreads_1H_df[col].str.replace("]", "")
    spreads_1H_df[col] = spreads_1H_df[col].str.strip()
    
spreads_1H_df['game_date'] = pd.to_datetime(spreads_1H_df['game_date'])
spreads_1H_df.sort_values('game_date', inplace=True)
spreads_1H_df

Unnamed: 0,away_team,home_team,game_date,away_1H_spread,home_1H_spread
0,Chicago,Miami,2006-10-31,"'-', '-', '+2½-110', '-'","'-', '-', '-2½-110', '-'"
1,Phoenix,L.A. Lakers,2006-10-31,"'-', '-', '-4½-115', '-'","'-', '-', '+4½-105', '-'"
2,Indiana,Charlotte,2006-11-01,"'-', '-', 'PK-110', '-'","'-', '-', 'PK-110', '-'"
3,Chicago,Orlando,2006-11-01,"'-', '-', 'PK-120', '-'","'-', '-', 'PK+100', '-'"
4,Atlanta,Philadelphia,2006-11-01,"'-', '-', '+2½-105', '-'","'-', '-', '-2½-115', '-'"
...,...,...,...,...,...
17702,Miami,L.A. Lakers,2020-10-02,"'+6½+100', '+6-105', '+6-103', '+6½-110'","'-6½-120', '-6-115', '-6-118', '-6½-110'"
17703,L.A. Lakers,Miami,2020-10-04,"'-6-105', '-5½-117', '-6-110', '-6-110'","'+6-115', '+5½-103', '+6-110', '+6-110'"
17704,L.A. Lakers,Miami,2020-10-06,"'-5-125', '-5-110', '-4½+111', '-5½-105'","'+5+105', '+5-110', '+4½-133', '+5½-115'"
17705,Miami,L.A. Lakers,2020-10-09,"'+4-105', '+4-105', '+3½-110', '+4½-115'","'-4-115', '-4-115', '-3½-110', '-4½-105'"


In [19]:
spreads_1H_df.to_csv("../data/first_half_spreads.csv", index=False)

### 1st Half Moneylines

In [5]:
dates[:10]

['2006-10-31',
 '2006-11-01',
 '2006-11-02',
 '2006-11-03',
 '2006-11-04',
 '2006-11-05',
 '2006-11-06',
 '2006-11-07',
 '2006-11-08',
 '2006-11-09']

In [8]:
# Get Moneylines

gm_date = []
away_teams = []
home_teams = []
away_scoreboards = []
home_scoreboards = []
away_moneylines_1H = []
home_moneylines_1H = []

for date in tqdm(dates[:2], desc='progress'):
    web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/money-line/1st-half/?date={}'.format(date)
    path = '../chromedriver.exe'
    driver = webdriver.Chrome(path)
    driver.get(web)
    sleep(random.randint(1,2))

    single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')

    num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

    num_listed_events = len(single_row_events)
    cutoff = num_listed_events - num_postponed_events

    for event in single_row_events[:cutoff]:

        away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
        home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text
        away_teams.append(away_team)
        home_teams.append(home_team)
        gm_date.append(date)

        scoreboard = event.find_elements_by_class_name('scoreboard-1TXQV')

        home_score = []
        away_score = []

        for score in scoreboard:
            quarters = score.find_elements_by_class_name('scoreboardColumn-2OtpR')
            for i in range(len(quarters)):
                scores = quarters[i].text.split('\n')
                away_score.append(scores[0])
                home_score.append(scores[1])
            away_scoreboards.append(away_score)
            home_scoreboards.append(home_score)            
            
        if len(away_scoreboards) != len(away_teams):
            num_to_add = len(away_teams) - len(away_scoreboards)
            for i in range(num_to_add):
                away_scoreboards.append([])
                home_scoreboards.append([])

        mls = event.find_elements_by_class_name('pointer-2j4Dk')
        away_lines = []
        home_lines = []
        for i in range(len(mls)):    
            if i % 2 == 0:
                away_lines.append(mls[i].text)
            else:
                home_lines.append(mls[i].text)
        away_moneylines_1H.append(away_lines)
        home_moneylines_1H.append(home_lines)

    driver.quit()

mls_1H_df = pd.DataFrame({'away_team':away_teams,
                  'home_team':home_teams,
                   'game_date':gm_date,
                          'away_scoreboard':away_scoreboards,
                          'home_scoreboard':home_scoreboards,
                  'away_1H_ml':away_moneylines_1H,
                  'home_1H_ml':home_moneylines_1H})

mls_1H_df

progress: 100%|██████████████████████████████████████████████████████████████████████████| 2/2 [00:22<00:00, 11.41s/it]


Unnamed: 0,away_team,home_team,game_date,away_scoreboard,home_scoreboard,away_1H_ml,home_1H_ml
0,Chicago,Miami,2006-10-31,"[22, 37, 21, 28, 108]","[16, 14, 21, 15, 66]","[-, -, -, -]","[-, -, -, -]"
1,Phoenix,L.A. Lakers,2006-10-31,"[41, 17, 21, 27, 106]","[26, 27, 34, 27, 114]","[-, -, -, -]","[-, -, -, -]"
2,Indiana,Charlotte,2006-11-01,"[23, 26, 28, 29, 106]","[27, 23, 23, 26, 99]","[-, -, -, -]","[-, -, -, -]"
3,Chicago,Orlando,2006-11-01,"[20, 25, 25, 24, 94]","[32, 31, 23, 23, 109]","[-, -, -, -]","[-, -, -, -]"
4,Atlanta,Philadelphia,2006-11-01,"[14, 21, 23, 17, 75]","[29, 20, 24, 15, 88]","[-, -, -, -]","[-, -, -, -]"
5,New Orleans,Boston,2006-11-01,"[26, 28, 21, 16, 91]","[16, 25, 24, 22, 87]","[-, -, -, -]","[-, -, -, -]"
6,Toronto,Brooklyn,2006-11-01,"[27, 22, 21, 22, 92]","[28, 25, 21, 28, 102]","[-, -, -, -]","[-, -, -, -]"
7,Milwaukee,Detroit,2006-11-01,"[30, 25, 23, 27, 105]","[21, 24, 27, 25, 97]","[-, -, -, -]","[-, -, -, -]"
8,Sacramento,Minnesota,2006-11-01,"[16, 25, 25, 17, 83]","[23, 20, 21, 28, 92]","[-, -, -, -]","[-, -, -, -]"
9,New York,Memphis,2006-11-01,"[20, 31, 20, 18, 89]","[25, 21, 14, 29, 89]","[-, -, -, -]","[-, -, -, -]"


In [12]:
web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/money-line/1st-half/?date=2006-10-31'.format(date)
path = '../chromedriver.exe'
driver = webdriver.Chrome(path)
driver.get(web)

single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')

num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

num_listed_events = len(single_row_events)
cutoff = num_listed_events - num_postponed_events

for event in single_row_events[:cutoff]:
    event.find_elements_by_class_name('pointer-2j4dk')

InvalidSelectorException: Message: invalid selector: An invalid or illegal selector was specified
  (Session info: chrome=89.0.4389.114)


## Update Betting Data

### Moneylines

In [6]:
def get_dates_to_scrape(season='2020-21'):
    dates = []
    for season_type in ['Regular Season', 'Playoffs']:
        games = leaguegamelog.LeagueGameLog(season=season, season_type_all_star=season_type).get_data_frames()[0]
        dates.extend(games['GAME_DATE'].unique())
    return dates

dates = get_dates_to_scrape(season='2020-21')


In [7]:
current_moneyline_df = pd.read_csv('../data/all_moneylines_sbr.csv')
missing_lines = current_moneyline_df.loc[(current_moneyline_df['away_moneyline'] == '[]') 
                         | (current_moneyline_df['home_moneyline'] == '[]')]

missing_dates = missing_lines['game_date'].unique()

current_moneyline_dates = set(current_moneyline_df['game_date'].unique().tolist())

In [8]:
dates_to_scrape_ml = set(dates)-current_moneyline_dates
dates_to_scrape_ml

{'2021-05-31',
 '2021-06-01',
 '2021-06-02',
 '2021-06-03',
 '2021-06-04',
 '2021-06-05',
 '2021-06-06',
 '2021-06-07',
 '2021-06-08',
 '2021-06-09',
 '2021-06-10',
 '2021-06-11',
 '2021-06-12',
 '2021-06-13',
 '2021-06-14',
 '2021-06-15',
 '2021-06-16',
 '2021-06-17',
 '2021-06-18',
 '2021-06-19',
 '2021-06-20',
 '2021-06-22',
 '2021-06-23',
 '2021-06-24',
 '2021-06-25',
 '2021-06-26',
 '2021-06-27',
 '2021-06-28',
 '2021-06-29',
 '2021-06-30',
 '2021-07-01',
 '2021-07-03',
 '2021-07-06',
 '2021-07-08',
 '2021-07-11',
 '2021-07-14',
 '2021-07-17',
 '2021-07-20'}

In [9]:
# Get Moneylines

gm_date1 = []
away_teams1 = []
home_teams1 = []
away_moneylines1 = []
home_moneylines1 = []

for date in tqdm(dates_to_scrape_ml, desc='progress'):
    web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/money-line/?date={}'.format(date)
    path = '../chromedriver.exe'
    driver = webdriver.Chrome(path)
    driver.get(web)
    sleep(random.randint(2,3))

    single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')

    num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

    num_listed_events = len(single_row_events)
    cutoff = num_listed_events - num_postponed_events

    for event in single_row_events[:cutoff]:

        away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
        home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text
        away_teams1.append(away_team)
        home_teams1.append(home_team)
        gm_date1.append(date)


        mls = event.find_elements_by_class_name('pointer-2j4Dk')
        away_lines = []
        home_lines = []
        for i in range(len(mls)):    
            if i % 2 == 0:
                away_lines.append(mls[i].text)
            else:
                home_lines.append(mls[i].text)
        away_moneylines1.append(away_lines)
        home_moneylines1.append(home_lines)

    driver.quit()

moneylines_to_add_df = pd.DataFrame({'away_team':away_teams1,
             'home_team':home_teams1,
             'game_date':gm_date1,
             'away_moneyline':away_moneylines1,
             'home_moneyline':home_moneylines1})

moneylines_to_add_df

progress: 100%|████████████████████████████████████████████████████████████████████████| 38/38 [05:34<00:00,  8.79s/it]


Unnamed: 0,away_team,home_team,game_date,away_moneyline,home_moneyline
0,Atlanta,Milwaukee,2021-06-23,"[+275, +269, +170, +300]","[-340, -374, -200, -380]"
1,Philadelphia,Atlanta,2021-06-18,"[-150, -150, -165, -155]","[+130, +130, +145, +135]"
2,Utah,L.A. Clippers,2021-06-18,"[-150, -145, -150, -155]","[+130, +125, +130, +135]"
3,L.A. Clippers,Phoenix,2021-06-20,"[+155, +148, +150, +150]","[-175, -170, -170, -170]"
4,Atlanta,Philadelphia,2021-06-20,"[+230, +140, +225, +240]","[-270, -180, -275, -300]"
5,Phoenix,L.A. Clippers,2021-06-24,"[-118, -105, -120, -115]","[-102, -115, +100, -105]"
6,Milwaukee,Brooklyn,2021-06-07,"[-124, -125, -125, -135]","[+104, +105, +105, +115]"
7,Denver,Phoenix,2021-06-07,"[+195, +185, +200, +205]","[-225, -221, -240, -245]"
8,Washington,Philadelphia,2021-06-02,"[+205, +203, +195, +195]","[-235, -245, -235, -230]"
9,Atlanta,New York,2021-06-02,"[-105, -105, -105, +105]","[-115, -115, -115, -125]"


In [10]:
current_moneyline_df = pd.read_csv('../data/all_moneylines_sbr.csv')

updated_moneylines = pd.concat([current_moneyline_df, moneylines_to_add_df])

updated_moneylines = updated_moneylines.drop_duplicates(['away_team', 'home_team', 'game_date'], keep='last')
updated_moneylines['game_date'] = pd.to_datetime(updated_moneylines['game_date'])
updated_moneylines = updated_moneylines.sort_values('game_date')
updated_moneylines



Unnamed: 0,away_team,home_team,game_date,away_moneyline,home_moneyline
0,Chicago,Miami,2006-10-31,"'+180', '-', '+180', '-'","'-220', '-', '-210', '-'"
1,Phoenix,L.A. Lakers,2006-10-31,"'-380', '-', '-350', '-'","'+290', '-', '+290', '-'"
13,Chicago,Orlando,2006-11-01,"'-', '-', '-120', '-'","'-', '-', '+100', '-'"
12,L.A. Clippers,Phoenix,2006-11-01,"'+165', '-', '+155', '-'","'-190', '-', '-175', '-'"
11,L.A. Lakers,Golden State,2006-11-01,"'+245', '-', '+250', '-'","'-290', '-', '-300', '-'"
...,...,...,...,...,...
24,Milwaukee,Phoenix,2021-07-08,"[+170, +170, +175, +160]","[-195, -200, -210, -185]"
13,Phoenix,Milwaukee,2021-07-11,"[+175, +177, +170, +165]","[-205, -208, -200, -190]"
18,Phoenix,Milwaukee,2021-07-14,"[+162, +166, +160, +175]","[-182, -195, -180, -205]"
27,Milwaukee,Phoenix,2021-07-17,"[+152, -, +155, +160]","[-172, -, -175, -185]"


In [11]:
for col in updated_moneylines.columns[3:]:
    updated_moneylines[col] = updated_moneylines[col].astype(str)
    updated_moneylines[col] = updated_moneylines[col].str.replace("[", "")
    updated_moneylines[col] = updated_moneylines[col].str.replace("]", "")
    updated_moneylines[col] = updated_moneylines[col].str.strip()
    
updated_moneylines

Unnamed: 0,away_team,home_team,game_date,away_moneyline,home_moneyline
0,Chicago,Miami,2006-10-31,"'+180', '-', '+180', '-'","'-220', '-', '-210', '-'"
1,Phoenix,L.A. Lakers,2006-10-31,"'-380', '-', '-350', '-'","'+290', '-', '+290', '-'"
13,Chicago,Orlando,2006-11-01,"'-', '-', '-120', '-'","'-', '-', '+100', '-'"
12,L.A. Clippers,Phoenix,2006-11-01,"'+165', '-', '+155', '-'","'-190', '-', '-175', '-'"
11,L.A. Lakers,Golden State,2006-11-01,"'+245', '-', '+250', '-'","'-290', '-', '-300', '-'"
...,...,...,...,...,...
24,Milwaukee,Phoenix,2021-07-08,"'+170', '+170', '+175', '+160'","'-195', '-200', '-210', '-185'"
13,Phoenix,Milwaukee,2021-07-11,"'+175', '+177', '+170', '+165'","'-205', '-208', '-200', '-190'"
18,Phoenix,Milwaukee,2021-07-14,"'+162', '+166', '+160', '+175'","'-182', '-195', '-180', '-205'"
27,Milwaukee,Phoenix,2021-07-17,"'+152', '-', '+155', '+160'","'-172', '-', '-175', '-185'"


In [12]:
updated_moneylines.to_csv("../data/all_moneylines_sbr.csv", index=False)

### Update Spreads

In [13]:
current_spread_df = pd.read_csv('../data/all_spreads_sbr.csv')
current_spread_df.loc[(current_spread_df['away_spread'] == '') | (current_spread_df['home_spread'] == '')]

Unnamed: 0,away_team,home_team,game_date,away_scoreboard,home_scoreboard,away_spread,home_spread


In [14]:
current_spread_df = pd.read_csv('../data/all_spreads_sbr.csv')
missing_dates = current_spread_df.loc[(current_spread_df['away_spread'] == '[]') | (current_spread_df['home_spread'] == '[]'), 'game_date'].unique()
len(missing_dates)

0

In [19]:
current_spread_df.sort_values('game_date')

Unnamed: 0,away_team,home_team,game_date,away_scoreboard,home_scoreboard,away_spread,home_spread
0,Chicago,Miami,2006-10-31,"'22', '37', '21', '28', '108'","'16', '14', '21', '15', '66'","'+4½-110', '-', '+4½-110', '-'","'-4½-110', '-', '-4½-110', '-'"
1,Phoenix,L.A. Lakers,2006-10-31,"'41', '17', '21', '27', '106'","'26', '27', '34', '27', '114'","'-8½-110', '-', '-8-110', '-'","'+8½-110', '-', '+8-110', '-'"
13,L.A. Clippers,Phoenix,2006-11-01,"'20', '25', '29', '30', '104'","'24', '24', '29', '35', '112'","'+5-110', '-', '+4½-110', '-'","'-5-110', '-', '-4½-110', '-'"
12,L.A. Lakers,Golden State,2006-11-01,"'22', '27', '38', '23', '110'","'22', '26', '25', '25', '98'","'+7½-110', '-', '+7½-110', '-'","'-7½-110', '-', '-7½-110', '-'"
11,Indiana,Charlotte,2006-11-01,"'23', '26', '28', '29', '106'","'27', '23', '23', '26', '99'","'-1-110', '-', '-1-110', '-'","'+1-110', '-', '+1-110', '-'"
...,...,...,...,...,...,...,...
18764,Brooklyn,Milwaukee,2021-06-13,"'26', '22', '21', '27', '96'","'23', '30', '28', '26', '107'","'-2-120', '-2-110', '-2½-115', '-2½-108'","'+2+100', '+2-110', '+2½-105', '+2½-108'"
18765,Phoenix,Denver,2021-06-13,"'28', '35', '33', '29', '125'","'22', '33', '28', '35', '118'","'-3-115', '-6-110', '-3½-115', '-3½-108'","'+3-105', '+6-116', '+3½-105', '+3½-108'"
18681,Philadelphia,Atlanta,2021-06-14,"'28', '34', '20', '18', '100'","'20', '29', '31', '23', '103'","'-2½-110', '-2½-115', '-3-110', '-2½-108'","'+2½-110', '+2½-105', '+3-110', '+2½-108'"
18682,Utah,L.A. Clippers,2021-06-14,"'13', '31', '29', '31', '104'","'30', '38', '26', '24', '118'","'+5-115', '+5-110', '+5-105', '+5½-108'","'-5-105', '-5-110', '-5-115', '-5½-108'"


In [17]:
dates_to_scrape = set(dates) - set(current_spread_df['game_date'].unique().tolist()) 
dates_to_scrape

{'2021-06-16',
 '2021-06-17',
 '2021-06-18',
 '2021-06-19',
 '2021-06-20',
 '2021-06-22',
 '2021-06-23',
 '2021-06-24',
 '2021-06-25',
 '2021-06-26',
 '2021-06-27',
 '2021-06-28',
 '2021-06-29',
 '2021-06-30',
 '2021-07-01',
 '2021-07-03',
 '2021-07-06',
 '2021-07-08',
 '2021-07-11',
 '2021-07-14',
 '2021-07-17',
 '2021-07-20'}

In [20]:
# Get Spreads

gm_date = []
away_teams = []
home_teams = []
away_scoreboards = []
home_scoreboards = []
away_spreads = []
home_spreads = []

for date in tqdm(dates_to_scrape, desc='progress'):
    web = 'https://www.sportsbookreview.com/betting-odds/nba-basketball/pointspread/?date={}'.format(date)
    path = '../chromedriver.exe'
    driver = webdriver.Chrome(path)
    driver.get(web)
    sleep(random.randint(1,2))


    single_row_events = driver.find_elements_by_class_name('eventMarketGridContainer-3QipG')

    num_postponed_events = len(driver.find_elements_by_class_name('eventStatus-3EHqw'))

    num_listed_events = len(single_row_events)
    cutoff = num_listed_events - num_postponed_events

    for event in single_row_events[:cutoff]:

        away_team = event.find_elements_by_class_name('participantBox-3ar9Y')[0].text
        home_team = event.find_elements_by_class_name('participantBox-3ar9Y')[1].text
        away_teams.append(away_team)
        home_teams.append(home_team)
        gm_date.append(date)


        scoreboard = event.find_elements_by_class_name('scoreboard-1TXQV')

        home_score = []
        away_score = []

        for score in scoreboard:
            quarters = score.find_elements_by_class_name('scoreboardColumn-2OtpR')
            for i in range(len(quarters)):
                scores = quarters[i].text.split('\n')
                away_score.append(scores[0])
                home_score.append(scores[1])
            away_scoreboards.append(away_score)
            home_scoreboards.append(home_score)
            
            
        if len(away_scoreboards) != len(away_teams):
            num_to_add = len(away_teams) - len(away_scoreboards)
            for i in range(num_to_add):
                away_scoreboards.append([])
                home_scoreboards.append([])
            
        
        spreads = event.find_elements_by_class_name('pointer-2j4Dk')
        away_lines = []
        home_lines = []
        for i in range(len(spreads)):    
            if i % 2 == 0:
                away_lines.append(spreads[i].text)
            else:
                home_lines.append(spreads[i].text)
        away_spreads.append(away_lines)
        home_spreads.append(home_lines)
        
        if len(away_spreads) != len(away_teams):
            num_to_add = len(away_teams) - len(away_spreads)
            for i in range(num_to_add):
                away_scoreboards.append([])
                home_scoreboards.append([])
            
        
    driver.quit()

spreads_to_add_df = pd.DataFrame({'away_team':away_teams,
                  'home_team':home_teams,
                   'game_date':gm_date,
                  'away_scoreboard':away_scoreboards,
                  'home_scoreboard':home_scoreboards,
                  'away_spread':away_spreads,
                  'home_spread':home_spreads})

spreads_to_add_df


progress: 100%|████████████████████████████████████████████████████████████████████████| 22/22 [02:48<00:00,  7.64s/it]


Unnamed: 0,away_team,home_team,game_date,away_scoreboard,home_scoreboard,away_spread,home_spread
0,Atlanta,Milwaukee,2021-06-23,"[25, 29, 34, 28, 116]","[28, 31, 26, 28, 113]","[+8-112, +8½-118, +8-115, +8-108]","[-8-108, -8½-108, -8-105, -8-108]"
1,Philadelphia,Atlanta,2021-06-18,"[22, 25, 33, 24, 104]","[29, 22, 25, 23, 99]","[-3-118, -2½-110, -3½-105, -2½-108]","[+3-102, +2½-110, +3½-115, +2½-108]"
2,Utah,L.A. Clippers,2021-06-18,"[33, 39, 22, 25, 119]","[31, 19, 41, 40, 131]","[-3-115, -2½-115, -3-105, -3-108]","[+3-105, +2½-105, +3-115, +3-108]"
3,L.A. Clippers,Phoenix,2021-06-20,"[21, 33, 39, 21, 114]","[21, 36, 36, 27, 120]","[+4-105, +4-110, +4-110, +4-108]","[-4-115, -4-110, -4-110, -4-108]"
4,Atlanta,Philadelphia,2021-06-20,"[25, 23, 28, 27, 103]","[28, 18, 25, 25, 96]","[+6½-109, +4-112, +6½-110, +6½-108]","[-6½-111, -4-114, -6½-110, -6½-108]"
5,Phoenix,L.A. Clippers,2021-06-24,"[21, 27, 21, 23, 92]","[29, 17, 34, 26, 106]","[-1½-113, -1-103, -1½-110, -1-108]","[+1½-107, +1-117, +1½-110, +1-108]"
6,Milwaukee,Brooklyn,2021-06-19,"[25, 22, 35, 27, 6, 115]","[28, 25, 28, 28, 2, 111]","[+2-110, +2-110, +2-105, +1½-108]","[-2-110, -2-110, -2-115, -1½-108]"
7,Phoenix,Milwaukee,2021-07-11,"[28, 17, 31, 24, 100]","[25, 35, 38, 22, 120]","[+4½-105, +4½-110, +4½-110, +5-108]","[-4½-115, -4½-110, -4½-110, -5-108]"
8,Milwaukee,Phoenix,2021-07-06,"[26, 23, 27, 29, 105]","[30, 27, 35, 26, 118]","[+5-110, +5-110, +5½-115, +5½-108]","[-5-110, -5-110, -5½-105, -5½-108]"
9,Phoenix,Milwaukee,2021-07-14,"[23, 29, 30, 21, 103]","[20, 32, 24, 33, 109]","[+4½-113, +4½-115, +4½-115, +5-108]","[-4½-107, -4½-105, -4½-105, -5-108]"


In [21]:
for col in spreads_to_add_df.columns[3:]:
    spreads_to_add_df[col] = spreads_to_add_df[col].astype(str)
    spreads_to_add_df[col] = spreads_to_add_df[col].str.replace("[", "")
    spreads_to_add_df[col] = spreads_to_add_df[col].str.replace("]", "")
    spreads_to_add_df[col] = spreads_to_add_df[col].str.strip()

spreads_to_add_df

Unnamed: 0,away_team,home_team,game_date,away_scoreboard,home_scoreboard,away_spread,home_spread
0,Atlanta,Milwaukee,2021-06-23,"'25', '29', '34', '28', '116'","'28', '31', '26', '28', '113'","'+8-112', '+8½-118', '+8-115', '+8-108'","'-8-108', '-8½-108', '-8-105', '-8-108'"
1,Philadelphia,Atlanta,2021-06-18,"'22', '25', '33', '24', '104'","'29', '22', '25', '23', '99'","'-3-118', '-2½-110', '-3½-105', '-2½-108'","'+3-102', '+2½-110', '+3½-115', '+2½-108'"
2,Utah,L.A. Clippers,2021-06-18,"'33', '39', '22', '25', '119'","'31', '19', '41', '40', '131'","'-3-115', '-2½-115', '-3-105', '-3-108'","'+3-105', '+2½-105', '+3-115', '+3-108'"
3,L.A. Clippers,Phoenix,2021-06-20,"'21', '33', '39', '21', '114'","'21', '36', '36', '27', '120'","'+4-105', '+4-110', '+4-110', '+4-108'","'-4-115', '-4-110', '-4-110', '-4-108'"
4,Atlanta,Philadelphia,2021-06-20,"'25', '23', '28', '27', '103'","'28', '18', '25', '25', '96'","'+6½-109', '+4-112', '+6½-110', '+6½-108'","'-6½-111', '-4-114', '-6½-110', '-6½-108'"
5,Phoenix,L.A. Clippers,2021-06-24,"'21', '27', '21', '23', '92'","'29', '17', '34', '26', '106'","'-1½-113', '-1-103', '-1½-110', '-1-108'","'+1½-107', '+1-117', '+1½-110', '+1-108'"
6,Milwaukee,Brooklyn,2021-06-19,"'25', '22', '35', '27', '6', '115'","'28', '25', '28', '28', '2', '111'","'+2-110', '+2-110', '+2-105', '+1½-108'","'-2-110', '-2-110', '-2-115', '-1½-108'"
7,Phoenix,Milwaukee,2021-07-11,"'28', '17', '31', '24', '100'","'25', '35', '38', '22', '120'","'+4½-105', '+4½-110', '+4½-110', '+5-108'","'-4½-115', '-4½-110', '-4½-110', '-5-108'"
8,Milwaukee,Phoenix,2021-07-06,"'26', '23', '27', '29', '105'","'30', '27', '35', '26', '118'","'+5-110', '+5-110', '+5½-115', '+5½-108'","'-5-110', '-5-110', '-5½-105', '-5½-108'"
9,Phoenix,Milwaukee,2021-07-14,"'23', '29', '30', '21', '103'","'20', '32', '24', '33', '109'","'+4½-113', '+4½-115', '+4½-115', '+5-108'","'-4½-107', '-4½-105', '-4½-105', '-5-108'"


In [22]:
current_spread_df = pd.read_csv('../data/all_spreads_sbr.csv')

current_spread_df
# current_spread_df.loc[current_spread_df['away_spread'] == '[]']

new_current_spreads = pd.concat([current_spread_df, spreads_to_add_df])
new_current_spreads = new_current_spreads.drop_duplicates(subset=['away_team', 'home_team', 'game_date'], keep='last')
new_current_spreads['game_date'] = pd.to_datetime(new_current_spreads['game_date'])
new_current_spreads.sort_values('game_date', inplace=True)
new_current_spreads.to_csv("../data/all_spreads_sbr.csv", index=False)

In [37]:
def get_draftking_lines(date):
    """
    INPUTS
    date: "yyyy-mm-dd"
    OUPUTS 
    dataframe with game spreads
    """
    gm_dates = []
    away_teams = []
    home_teams = []
    away_spreads = []
    home_spreads = []
    away_moneylines = []
    home_moneylines = []

    web = 'https://sportsbook.draftkings.com/leagues/basketball/88670846?category=game-lines&subcategory=game'
    path = '../chromedriver.exe'
    driver = webdriver.Chrome(path)
    driver.get(web)
    sleep(2)

    teams = driver.find_elements_by_xpath('//*[@id="root"]/section/section[2]/section/div[4]/div/div[3]/div/div/div[2]/div/div[2]/div[1]/table/tbody/tr/th/a/div/div[2]/span/div/div')
    spreads = driver.find_elements_by_xpath('//*[@id="root"]/section/section[2]/section/div[4]/div/div[3]/div/div/div[2]/div/div[2]/div[1]/table/tbody/tr/td[1]/div/div/div/div[1]/span')
    moneylines = driver.find_elements_by_xpath('//*[@id="root"]/section/section[2]/section/div[4]/div/div[3]/div/div/div[2]/div/div[2]/div[1]/table/tbody/tr/td[3]/div/div/div/div/div[2]/span')

    for i in range(len(teams)):
        if i%2==0:
            away_teams.append(teams[i].text)
            away_spreads.append(spreads[i].text)
            away_moneylines.append(moneylines[i].text)
            gm_dates.append(date)
        else:
            home_teams.append(teams[i].text)
            home_spreads.append(spreads[i].text)
            home_moneylines.append(moneylines[i].text)    

    driver.quit()

    todays_lines = pd.DataFrame({"game_date":gm_dates,
                'away_team':away_teams,
                'home_team':home_teams,
                'away_spread':away_spreads,
                'home_spread':home_spreads,
                'away_moneyline':away_moneylines,
                'home_moneyline':home_moneylines})
    
    return todays_lines

todays_lines = get_draftking_lines(date='2021-11-03')
todays_lines

Unnamed: 0,game_date,away_team,home_team,away_spread,home_spread,away_moneyline,home_moneyline
0,2021-11-03,POR Trail Blazers,CLE Cavaliers,-5.5,5.5,-235,190
1,2021-11-03,BOS Celtics,ORL Magic,-7.0,7.0,-280,225
2,2021-11-03,NY Knicks,IND Pacers,1.0,-1.0,-105,-115
3,2021-11-03,TOR Raptors,WAS Wizards,3.5,-3.5,145,-165
4,2021-11-03,CHI Bulls,PHI 76ers,2.0,-2.0,105,-125
5,2021-11-03,ATL Hawks,BKN Nets,4.5,-4.5,160,-190
6,2021-11-03,DEN Nuggets,MEM Grizzlies,-1.0,1.0,-110,-110
7,2021-11-03,LA Clippers,MIN Timberwolves,-2.5,2.5,-135,115
8,2021-11-03,DAL Mavericks,SA Spurs,-2.0,2.0,-130,110
9,2021-11-03,CHA Hornets,GS Warriors,6.0,-6.0,190,-235


In [40]:
def convert_american_to_decimal(x):
    x = x.astype(int)
    return np.where(x<0, (-x+100)/x, (x+100)/100)

convert_american_to_decimal(todays_lines['home_moneyline'])

array([ 2.9       ,  3.25      , -1.86956522, -1.60606061, -1.8       ,
       -1.52631579, -1.90909091,  2.15      ,  2.1       , -1.42553191,
       -1.55555556])

In [41]:
todays_lines['away_team'].str[:3].str.strip()


0     POR
1     BOS
2      NY
3     TOR
4     CHI
5     ATL
6     DEN
7      LA
8     DAL
9     CHA
10     NO
Name: away_team, dtype: object

In [47]:
def clean_draftking_lines(df):
    
    abbr_mapping = {'Celtics': 'BOS', 'Trail Blazers': 'POR',
                    'Lakers': 'LAL', 'Nets': 'BKN',
                    'Cavaliers': 'CLE', 'Raptors': 'TOR',
                    '76ers': 'PHI', 'Grizzlies': 'MEM',
                    'Timberwolves': 'MIN', 'Pelicans': 'NOP',
                    'Thunder': 'OKC', 'Mavericks': 'DAL',
                    'Spurs': 'SAS', 'Nuggets': 'DEN',
                    'Warriors': 'GSW', 'Clippers': 'LAC',
                    'Magic': 'ORL', 'Jazz': 'UTA',
                    'Hornets': 'CHA', 'Pistons': 'DET',
                    'Heat': 'MIA', 'Suns': 'PHX',
                    'Hawks': 'ATL', 'Knicks': 'NYK',
                    'Pacers': 'IND', 'Bulls': 'CHI',
                    'Rockets': 'HOU', 'Bucks': 'MIL',
                    'Kings': 'SAC', 'Wizards': 'WAS'}
    
    
    df['away_team'] = df['away_team'].str[3:].str.strip()
    df['home_team'] = df['home_team'].str[3:].str.strip()
    df['away_team'] = df['away_team'].replace(abbr_mapping)
    df['home_team'] = df['home_team'].replace(abbr_mapping)

    df['away_spread'] = df['away_spread'].str.replace('pk', '0', regex=False).astype(float)
    df['home_spread'] = df['home_spread'].str.replace('pk', '0', regex=False).astype(float)
    df['away_moneyline'] = convert_american_to_decimal(df['away_moneyline'].astype(int))
    df['home_moneyline'] = convert_american_to_decimal(df['home_moneyline'].astype(int))
    
    return df

clean_draftking_lines(todays_lines)


Unnamed: 0,game_date,away_team,home_team,away_spread,home_spread,away_moneyline,home_moneyline
0,2021-11-03,,,-5.5,5.5,-1.425532,2.9
1,2021-11-03,,,-7.0,7.0,-1.357143,3.25
2,2021-11-03,,,1.0,-1.0,-1.952381,-1.869565
3,2021-11-03,,,3.5,-3.5,2.45,-1.606061
4,2021-11-03,,,2.0,-2.0,2.05,-1.8
5,2021-11-03,,,4.5,-4.5,2.6,-1.526316
6,2021-11-03,,,-1.0,1.0,-1.909091,-1.909091
7,2021-11-03,,,-2.5,2.5,-1.740741,2.15
8,2021-11-03,,,-2.0,2.0,-1.769231,2.1
9,2021-11-03,,,6.0,-6.0,2.9,-1.425532
