In [157]:
import numpy           as np
import pandas          as pd
import requests
import bs4
# from nba_api.stats.static import teams
# from nba_api.stats.endpoints import scoreboard
#import datetime
#import statsmodels.api as sm
# import os

***

#### Scrape from Vegas Insider

In [3]:
def get_game_html(league, away, home, date):

    """Helper function: returns page content for provided matchup."""

    url = 'https://www.vegasinsider.com/{}/odds/las-vegas/line-movement/{}-@-{}.cfm/date/{}'.format(league, away, home, date)
    html = requests.get(url).content
    
    return html

In [152]:
def get_odds_data(league : str, away : str, home : str, date : str, odds_type : str ="money line"):
    
    """
    Returns VI odds consensus history dataframe and time of game for provided matchup.
    - arguments:
        - league     : sports league of matchup
        - away, home : away team and home team of the game of interest
        - date       : date of game expressed as 'mm-dd-yy'
        - odds_type  : type of odds to return, choose from "money line" (default), "spread", "total", "first half", "second half", or "all"
    """

    # obtain VI consensus line movements
    html = get_game_html(league, away, home, date)
    soup = bs4.BeautifulSoup(html, "html.parser")

    try:

        odds_hist = pd.read_html(soup.find("a", attrs={"name":"BT"}).find_parent("table").find_next_sibling("table").prettify(), header=[0,1])[0]

        odds_hist.columns = ['_'.join(col).strip() for col in odds_hist.columns.values]
        odds_hist["Date"] = pd.to_datetime(odds_hist["Unnamed: 0_level_0_Date"] + "/" + date[-2:] + " " + odds_hist["Unnamed: 1_level_0_Time"])
        odds_hist.drop(columns=["Unnamed: 0_level_0_Date", "Unnamed: 1_level_0_Time"], inplace=True)

        odds_hist.set_index("Date", inplace=True)

        if odds_type == "money line":

            # money line dataframe
            money_line = odds_hist[["Money Line_Fav", "Money Line_Dog"]].dropna().drop_duplicates(subset=["Money Line_Fav", "Money Line_Dog"])

            money_line[["fav", "fav_odds"]] = money_line["Money Line_Fav"].str.split("-", expand=True)
            money_line["fav"] = money_line["fav"].str.strip()
            money_line["fav_odds"] = -pd.to_numeric(money_line["fav_odds"])

            money_line[["dog", "dog_odds"]] = money_line["Money Line_Dog"].str.split("+", expand=True)
            money_line["dog"] = money_line["dog"].str.strip()
            money_line["dog_odds"] = pd.to_numeric(money_line["dog_odds"])

            money_line.drop(columns=["Money Line_Fav", "Money Line_Dog"], inplace=True)

            return money_line

        else:

            return "odds_type not supported"

    # spread dataframe
    # spread = odds_hist[["Spread_Fav", "Spread_Dog"]].dropna().drop_duplicates(subset=["Spread_Fav", "Spread_Dog"])
    # spread[["fav", "spread", "fav_odds"]] = spread["Spread_Fav"].str.split(r"\+|-", expand=True)

    except:
        odds_hist = None

    # try:
    #     game_time = pd.to_datetime(date + " " + soup.find("b",text="\xa0Game Time:").find_parent().text[14:-1])
    # except:
    #     game_time = None
    
    # package outputs
    # output = {'money_line':money_line, 'game_time':game_time}
    
    #return money_line

In [155]:
get_odds_data("nba", "jazz", "raptors","01-01-19").head()

Unnamed: 0_level_0,fav,fav_odds,dog,dog_odds
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-12-31 15:10:00,TOR,-153,UTH,138
2019-12-31 15:10:00,TOR,-153,UTH,138
2019-12-31 15:15:00,TOR,-160,UTH,140
2019-12-31 16:00:00,TOR,-160,UTH,140
2019-12-31 17:35:00,TOR,-150,UTH,130


#### Get game schedule data

In [143]:
schedule = pd.read_csv("2019nbaschedule.csv")
schedule["Datetime"] = pd.to_datetime(schedule["Date"] + " " + schedule["Start (ET)"])

#### Map NBA team names to VI team nicknames

In [144]:
nicknames = pd.read_csv("teamnames.csv")

In [145]:
schedule = schedule.merge(nicknames, left_on="Visitor/Neutral", right_on="full").merge(nicknames, left_on="Home/Neutral", right_on="full")

In [146]:
schedule.drop(columns=["Date","Start (ET)","Box Score","Visitor/Neutral", "Home/Neutral"], inplace=True)
schedule.rename(columns={"Datetime":"Date", "full_x":"away","PTS":"away_pts","full_y":"home","PTS.1":"home_pts", "nickname_x":"away_nick", "nickname_y":"home_nick"}, inplace=True)
schedule = schedule.sort_values(by="Date").reset_index(drop=True)

In [154]:
schedule.head()

Unnamed: 0,away_pts,home_pts,OT,Attendance,Notes,Date,away,away_nick,home,home_nick
0,87,105,,18624,,2018-10-16 20:00:00,Philadelphia 76ers,76ers,Boston Celtics,celtics
1,100,108,,19596,,2018-10-16 22:30:00,Oklahoma City Thunder,thunder,Golden State Warriors,warriors
2,113,112,,17889,,2018-10-17 19:00:00,Milwaukee Bucks,bucks,Charlotte Hornets,hornets
3,100,103,,20332,,2018-10-17 19:00:00,Brooklyn Nets,nets,Detroit Pistons,pistons
4,83,111,,17923,,2018-10-17 19:00:00,Memphis Grizzlies,grizzlies,Indiana Pacers,pacers


#### Lookup money line history for every NBA game

In [147]:
ml2019 = get_odds_data()

Unnamed: 0,away_pts,home_pts,OT,Attendance,Notes,Date,away,away_nick,home,home_nick
0,87,105,,18624,,2018-10-16 20:00:00,Philadelphia 76ers,76ers,Boston Celtics,celtics
1,100,108,,19596,,2018-10-16 22:30:00,Oklahoma City Thunder,thunder,Golden State Warriors,warriors
2,113,112,,17889,,2018-10-17 19:00:00,Milwaukee Bucks,bucks,Charlotte Hornets,hornets
3,100,103,,20332,,2018-10-17 19:00:00,Brooklyn Nets,nets,Detroit Pistons,pistons
4,83,111,,17923,,2018-10-17 19:00:00,Memphis Grizzlies,grizzlies,Indiana Pacers,pacers
...,...,...,...,...,...,...,...,...,...,...
1307,109,104,,20014,,2019-06-02 20:00:00,Golden State Warriors,warriors,Toronto Raptors,raptors
1308,123,109,,19596,,2019-06-05 21:00:00,Toronto Raptors,raptors,Golden State Warriors,warriors
1309,105,92,,19596,,2019-06-07 21:00:00,Toronto Raptors,raptors,Golden State Warriors,warriors
1310,106,105,,20144,,2019-06-10 21:00:00,Golden State Warriors,warriors,Toronto Raptors,raptors


#### Spread History

In [16]:
def get_spread(odds_table):
    
    # arguments:
        # odds_table: dataframe of line and spread movements generated by odds_scraper      
    # returns:
        # spread    : cleaned dataframe of all historical changes in the spread
        
    ###########################################################################################
    
    # conditions to filter odds table
    fav_changed = (odds_table['spread_fav'] != odds_table['spread_fav'].shift(1))
    dog_changed = (odds_table['spread_dog'] != odds_table['spread_dog'].shift(1))
    fav_notna   = (odds_table['spread_fav'].notna())
    dog_notna   = (odds_table['spread_dog'].notna())
    
    spread      = odds_table[(fav_changed | dog_changed) & fav_notna & dog_notna][['spread_fav','spread_dog']]
    
    # clean fav and dog columns
    spread['fav'] = spread['spread_fav'].str[:3]
    spread['dog'] = spread['spread_dog'].str[:3]
    
    spread['fav_odds'] = spread['spread_fav'].str[-4:]
    spread['dog_odds'] = spread['spread_dog'].str[-4:]
    
    spread['spread'] = spread['spread_fav'].str[3:-4]

    return spread.iloc[:,2:]

#### Over/Under History

In [17]:
def get_over_under(odds_table):
    
    # arguments:
        # odds_table: dataframe of line and spread movements generated by odds_scraper      
    # returns:
        # over_under: cleaned dataframe of all historical changes in the over/under (total)
        
    ###########################################################################################
    
    # conditions to filter odds table
    over_changed  = (odds_table['over'] != odds_table['over'].shift(1))
    under_changed = (odds_table['under'] != odds_table['under'].shift(1))
    over_notna    = (odds_table['over'].notna())
    under_notna   = (odds_table['under'].notna())
    
    over_under    = odds_table[(over_changed | under_changed) & over_notna & under_notna][['over','under']]
    
    # clean over and under columns
    over_under['total'] = over_under['over'].str[:-4]
    over_under['over']  = over_under['over'].str[-4:]
    over_under['under'] = over_under['under'].str[-4:]

    return over_under.iloc[:,:]

#### Game Results

In [18]:
def get_game_score(away, home, date):
    
    # arguments:
        # away, home: away team and home team of the game of interest
        # date      : date of game expressed as 'mm-dd-yy'        
    # returns:
        # score     : dataframe of both teams' final scores and team stats
            
    ###########################################################################################
    
    # obtain team abbreviations
    away_abbrev = [t for t in teams.get_teams() if t['nickname']==away.capitalize()][0]['abbreviation']
    home_abbrev = [t for t in teams.get_teams() if t['nickname']==home.capitalize()][0]['abbreviation']
    
    # get scoreboard for game
    all_games     = scoreboard.Scoreboard(game_date=date,league_id='00').get_data_frames()[1]
    score         = all_games[all_games['TEAM_ABBREVIATION'].isin([away_abbrev,home_abbrev])].copy()
    score.drop(columns=['GAME_DATE_EST','GAME_SEQUENCE','GAME_ID','TEAM_ID'], inplace=True)
    score.columns = map(str.lower, score.columns)
    score.set_index('team_abbreviation',inplace=True)
    
    return score

#### Get all history for any game

In [19]:
def get_game_info(away, home, date):
    
    # arguments:
        # away, home: away team and home team of the game of interest
        # date      : date of game expressed as 'mm-dd-yy'        
    # returns:
        # game_dict : dictionary of following four cleaned dataframes:
            # money_line: all money line changes
            # spread    : all spread changes
            # over_under: all over/under changes
            # score     : final score and stats for both teams
            # as well as the matchup and game time
            
    ###########################################################################################
    
    # get odds data
    all_data  = get_odds_data(away, home, date)
    raw_data  = all_data['odds_hist']
    matchup   = all_data['matchup']
    game_time = all_data['game_time']
    
    # get money line, spread, and over/under
    money_line = get_money_line(raw_data)
    spread     = get_spread(raw_data)
    over_under = get_over_under(raw_data)
    
    # get game result
    #score = get_game_score(away, home, date)
    
    game_dict  = {'matchup':matchup, 'game_time':game_time, 
                  'money_line':money_line, 'spread':spread, 'over_under':over_under}
    
    return game_dict

In [20]:
g = get_game_info('jazz','timberwolves','12-11-19')

KeyError: 1

In [None]:
g

{'matchup': 'Utah Jazz @ Minnesota Timberwolves',
 'game_time': Timestamp('2019-12-11 20:05:00'),
 'score':                   team_city_name team_wins_losses  pts_qtr1  pts_qtr2  \
 team_abbreviation                                                       
 UTA                         Utah            14-11        25        34   
 MIN                    Minnesota            10-14        23        36   
 
                    pts_qtr3  pts_qtr4  pts_ot1  pts_ot2  pts_ot3  pts_ot4  \
 team_abbreviation                                                           
 UTA                      38        30        0        0        0        0   
 MIN                      24        33        0        0        0        0   
 
                   ...   pts_ot8  pts_ot9  pts_ot10  pts  fg_pct  ft_pct  \
 team_abbreviation ...                                                     
 UTA               ...         0        0         0  127   0.566   0.773   
 MIN               ...         0        0         0  