# NBA Bet Prop Analysis

## Part 2 - Updates 

This script is meant to handle the daily data updates to the SQLite database (PropAnalysis.db). Just like in Part 1, this script will scrape DraftKings for NBA bet odds, SportsLine for the NBA projections, and RapidAPI's API-NBA feed for the actual game stats. However, in this script we will not be replacing the table, but appending to it. 

During this update process we will also be able to "score" our bets to see if they won or lost. After collecting a good sample size of results, we will be able to compare our win rate to our estimated edge to see if their is a good correlation that would imply a profitable betting model. 

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
import json
from datetime import date, datetime, timedelta
from pytz import timezone
import sqlite3
from sqlalchemy import create_engine
import time

### Helper Functions 

In [2]:
def american2DecimalOdds(americanBetOdds):
    """
    @betOdds (str) the American odds of the bet. Must be prefaced with a '+' or '-'
    
    Returns the bet odds in decimal format. 
    """
    try:
        if (americanBetOdds[0] == '+'):
            americanBetOdds = int(americanBetOdds[1:])
            decimalOdds = (americanBetOdds/100) + 1
            # Return the decimalOdds in a clean format
            return(round(decimalOdds,2))
        elif americanBetOdds[0] == '-':
            americanBetOdds = int(americanBetOdds[1:])
            decimalOdds = (100/americanBetOdds) + 1
            # Return the decimalOdds in a clean format
            return(round(decimalOdds,2))
        else:
            print("Bet odds must begin with a '+' or '-'")
    except:
        return(None)

In [3]:
def impliedOdds(betOdds):
    """
    @betOdds (str) the American odds of the bet. Must be prefaced with a '+' or '-'
    
    Takes in the betOdds and returns the implied probability of the bet
    """
    try:
        # First need to convert the American odds to decimal odds
        decimalOdds = american2DecimalOdds(betOdds)

        # Use the decimal odds to return the implied probability
        probability = 1/decimalOdds * 100

        # Return the probability rounded to the nearest whole number
        return(round(probability))
    except:
        return(None)

In [4]:
def findTeamIds(Game):
    """
    @Game str: the DraftKings listed game
    Example: "GS Warriors @ NO Pelicans"
    
    Return: @homeTeamId int,
            @awayTeamId int
    """
    # Replace certain names from DraftKings to Match our NBA_Teams table
    Game = Game.replace("GS Warriors", "GSW Warriors").replace("SA Spurs", "SAS Spurs")
    Game = Game.replace("NO Pelicans", "NOP Pelicans").replace("LA Clippers", "LAC Clippers")
    Game = Game.replace("NY Knicks", "NYK Knicks").replace("LA Lakers", "LAL Lakers")
    Game = Game.replace("Trail Blazers", "TrailBlazers").replace("PHO Suns", "PHX Suns")
    # Split the game into home and away team
    aTeam, hTeam = Game.split("@")
    # Clean the whitespaces from either end
    hTeam = hTeam.strip()
    aTeam = aTeam.strip()
    # Split the teams in the City and Nickname
    hShortName, hNickname = hTeam.split(" ")
    aShortName, aNickname = aTeam.split(" ")
    # Replace certain names from DraftKings to Match our NBA_Teams table
    hNickname = hNickname.replace("TrailBlazers", "Trail Blazers")
    aNickname = aNickname.replace("TrailBlazers", "Trail Blazers")
    # Use the City and Nickname to find the teamId
    hTeamId = NBA_Teams.loc[(NBA_Teams['shortName'] == hShortName) & (NBA_Teams['Nickname'] == hNickname), "teamId"].values[0]
    aTeamId = NBA_Teams.loc[(NBA_Teams['shortName'] == aShortName) & (NBA_Teams['Nickname'] == aNickname), "teamId"].values[0]
    # Return the results
    return(hTeamId, aTeamId)

In [5]:
def findGameId(Game, startDate, startTime):
    """
    @Game str: the DraftKings listed game. Example: 'GS Warriors @ NO Pelicans'
    @startDate str: the DraftKings listed game date. Example: 'Fri, Jan 14'
    @startTime str: the DraftKings listed game start time. Example: '07:30 PM'
    """
    # Get the home and awaw team ids
    hTeamId, aTeamId = findTeamIds(Game)
    # Get the current Year
    currentDateTime = datetime.now()
    date = currentDateTime.date()
    year = date.strftime("%Y")
    # Format startDate as yyyy-mm-dd format
    startDate = datetime.strptime(startDate, '%a, %b %d')
    startDate = startDate.strftime(f'{year}-%m-%d')
    # Format startTime hh:mm
    startTime = datetime.strptime(startTime, '%I:%M %p')
    startTime = startTime.strftime('%H:%M')
    # Use the startDate and startTime to find the gameId
    gameId = NBA_Games.loc[(NBA_Games['startDate'] == startDate) & (NBA_Games['homeTeamId'] == hTeamId) &
                           (NBA_Games['awayTeamId'] == aTeamId), "gameId"].values[0]
    # Return the results
    return(gameId)

In [6]:
def betGrade(line2ProjDiff):
    """
    @line2ProjDiff float: the absolute difference between the DK line and the SportLine projection
    """
    if line2ProjDiff > 6:
        grade = 'A'
    elif line2ProjDiff > 4:
        grade = 'B'
    elif line2ProjDiff > 2:
        grade = 'C'
    elif line2ProjDiff > 1:
        grade = 'D'
    else:
        grade = 'F'
    return(grade)

## Pull in SQLite Tables 

In [7]:
# Connect to SQLite
conn = sqlite3.connect('Data/PropAnalysis.db')

# Query NBA_Teams table into dataframe
NBA_Teams = pd.read_sql_query("SELECT * FROM NBA_Teams", conn)

# Query NBA_Players table into dataframe
NBA_Players = pd.read_sql_query("SELECT * FROM NBA_Players", conn)

# Close connection when done
conn.close()

## Pull Today's Data 

In [8]:
# Returns the current local date
today = date.today()
print("Today date is: ", today)

Today date is:  2022-01-16


### DraftKings - NBA

In [9]:
# Create a dataframe to append results to
bet_offers_dailydf = pd.DataFrame(columns=['League', 'Game', 'StartDate', 'StartTime', 'Player', 'BetLabel', 
                                      'SportsLine_Projection', 'DK_Line', 'Outcome1_Label', 'Outcome1_Odds', 
                                      'Outcome2_Label', 'Outcome2_Odds'])

# Create a list of the DraftKings URLs to parse
url = "https://sportsbook.draftkings.com/leagues/basketball/88670846"
    
# Get the BeautifulSoup data from the DraftKings website
dk_response = requests.get(url)
dk_soup = BeautifulSoup(dk_response.text, "html.parser")

# Narrow the Beautiful Soup extract to just the field of interest "window.__INITIAL_STATE__"

# Filter out opening and closing <script> tags
dk_scrape = str(list(list(list(dk_soup.children)[2])[3])[11]).replace("<script>","").replace("</script>", "")
# Remove leading and trailing whitespace
dk_scrape = dk_scrape.strip()
# Split sections
dk_scrape = dk_scrape.split(";\n")
# Isolate to json of interest
dk_scrape = dk_scrape[6].strip()
# Format as json dictionary
dk_scrape = dk_scrape.replace("window.__INITIAL_STATE__ = ","")
dk_scrape = json.loads(dk_scrape)

# Grab the sport ID from the scrape
sportId = list(dk_scrape['eventGroups'].keys())[0]

# From the full scrape of the page, pull a list of the games to loop over and extract data from
games = dk_scrape['eventGroups'][sportId]['events'].keys()

# Loop over games
for index, game in enumerate(games):

    # Set the game JSON as variable
    game_details = dk_scrape['eventGroups'][sportId]['events'][game]
    # Get the eventId so I can scrape the actual props
    eventId = game_details['eventId']

    # Web scrape Draft Kings for player props
    props_url = f"https://sportsbook.draftkings.com/event/{eventId}"
    response = requests.get(props_url)
    soup = BeautifulSoup(response.text, "html.parser")

    # Clean BeautifulSoup response
    # Filter out opening and closing <script> tags
    scrape = str(list(list(list(soup.children)[2])[3])[11]).replace("<script>","").replace("</script>", "")
    # Remove leading and trailing whitespace
    scrape = scrape.strip()
    # Split sections
    scrape = scrape.split(";\n")
    # Isolate to json of interest
    scrape = scrape[6].strip()
    # Format as json dictionary
    scrape = scrape.replace("window.__INITIAL_STATE__ = ","")
    scrape = json.loads(scrape)

    # Parse the scrape results
    eventGroupId = list(scrape['eventGroups'].keys())[0]
    providerEventId = list(scrape['eventGroups'][eventGroupId]['events'].keys())[0]
    providerOfferId = list(scrape['offers'][eventGroupId].keys())[0]
    eventId = scrape['eventGroups'][eventGroupId]['events'][providerEventId]['eventId']
    game_details = scrape['eventGroups'][eventGroupId]['events'][providerEventId]
    game = game_details['name']
    eventGroup = game_details['eventGroupName']
    teamName1 = game_details['teamName1']
    teamName1 = game_details['teamName2']
    startDate = game_details['startDate']
    date, dtime = startDate.split('T')
    dt = date + ' ' + dtime[:8]
    dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S')
    # Format as right timezone (-5 hours)
    dt = dt - timedelta(hours=5)
    date = dt.strftime('%a, %b %d')
    dtime = dt.strftime('%I:%M %p')

    # Isolate the bet offers from the beautiful soup scrape
    offers = scrape['offers'][eventGroupId]

    # Loop over offers and add to dataframe
    for index, offer in enumerate(offers):

        # Create a list for parsing the offers 
        offers_parsed = [eventGroup, game, date, dtime, '', '', '', '', '', '', '', '']

        # Parse the betting offer
        offer_dict = scrape['offers'][eventGroupId][offer]
        try:
            providerOfferId = offer_dict['providerOfferId']
            providerId = offer_dict['providerId']
            providerEventId = offer_dict['providerEventId']
            bet_label = offer_dict['label']
            isOpen = offer_dict['isOpen']
            outcomes = offer_dict['outcomes']
        except:
            continue

        # Assign to list
        offers_parsed[5] = bet_label    

        # Extract outcomes
        if len(outcomes) == 1:

            # Parse the outcome
            outcome_label = outcomes[0]['label']
            outcome_odds = outcomes[0]['oddsAmerican']
            offers_parsed[8] = outcome_label
            offers_parsed[9] = outcome_odds

            # Append the list to the dataframe 
            bet_offers_dailydf.loc[len(bet_offers_dailydf)] = offers_parsed

        elif len(outcomes) == 2:

            # Parse the outcomes
            for i, x in enumerate(outcomes):
                if i == 0:
                    outcome_label = outcomes[i]['label']
                    try:
                        outcome_line = outcomes[i]['line']
                    except:
                        outcome_line = ''
                    outcome_odds = outcomes[i]['oddsAmerican']
                    offers_parsed[7] = outcome_line
                    offers_parsed[8] = outcome_label
                    offers_parsed[9] = outcome_odds
                else:
                    outcome_label = outcomes[i]['label']
                    try:
                        outcome_line = outcomes[i]['line']
                    except:
                        continue
                    outcome_odds = outcomes[i]['oddsAmerican']
                    offers_parsed[10] = outcome_label
                    offers_parsed[11] = outcome_odds

            # Append the list to the dataframe 
            bet_offers_dailydf.loc[len(bet_offers_dailydf)] = offers_parsed

        else:

            continue

# Preview output
bet_offers_dailydf.head()

Unnamed: 0,League,Game,StartDate,StartTime,Player,BetLabel,SportsLine_Projection,DK_Line,Outcome1_Label,Outcome1_Odds,Outcome2_Label,Outcome2_Odds
0,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,,Total Points Odd/Even,,,Odd,-120,,
1,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,,Hamidou Diallo Points + Assists + Rebounds,,21.5,Over,105,Under,-140.0
2,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,,Hamidou Diallo Assists + Rebounds,,6.5,Over,125,Under,-165.0
3,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,,Cameron Payne Points,,12.5,Over,105,Under,-135.0
4,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,,Hamidou Diallo Points + Assists,,15.5,Over,-135,Under,100.0


### SportsLine Projections - NBA 

In [10]:
# Web scrape Fantasy Pros for relevant information
url = 'https://www.sportsline.com/nba/expert-projections/simulation/'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Isolate soup to the table of interest
output = str(list(list(list(soup.children)[1])[1])[1])
# Format as json dictionary
output = output.replace("""<script id="__NEXT_DATA__" type="application/json">""","").replace("</script>","")
output = json.loads(output)
projections = output['props']['initialState']['fantasyState']['projectionsPageState']['data']['projections']

# Create a dataframe to append results to
NBA_SportsLine_Projections = pd.DataFrame(columns=['LEAGUE', 'PLAYER', 'POS', 'TEAM', 'GAME', 'FP', 'PTS', 'MIN', 'FG',
                                                   'FGA', 'AST', 'TRB', 'DRB', 'ORB', 'BK', 'ST', 'TO', 'FT', 'FTP', 
                                                   'FGP', 'DATE'])
                                                   
# Loop over the projections and parse
for i, projection in enumerate(projections):
    # Create an empty list to update the data for
    player_projections = [0] * 21
    # Set LEAGUE columns
    player_projections[0] = 'NBA'
    # Loop over list of projections to scrape
    projectionFields = projection['projectionFields']
    # Create a counter to add additional fields to correct list position
    counter = 1
    for x in projectionFields:
        field = x['field']
        if field in NBA_SportsLine_Projections.columns:
            try:
                value = x['value']
                player_projections[counter] = value
            except KeyError:
                player_projections[counter] = 0
            counter += 1
    # Set DATE column with today's date
    player_projections[20] = today
    # Append the list to the dataframe         
    NBA_SportsLine_Projections.loc[len(NBA_SportsLine_Projections)] = player_projections

# Preview output
NBA_SportsLine_Projections.head()

Unnamed: 0,LEAGUE,PLAYER,POS,TEAM,GAME,FP,PTS,MIN,FG,FGA,...,TRB,DRB,ORB,BK,ST,TO,FT,FTP,FGP,DATE
0,NBA,Nikola Jokic,C,DEN,UTA@DEN,52.1,25.3,33,9.7,18.5,...,12.2,10.0,2.2,0.8,1.3,2.6,4.2,0.82,0.52,2022-01-16
1,NBA,Karl-Anthony Towns,C,MIN,GS@MIN,38.8,22.1,34,7.7,16.0,...,10.8,7.7,3.1,1.4,0.9,3.3,4.1,0.82,0.48,2022-01-16
2,NBA,De`Aaron Fox,PG,SAC,HOU@SAC,36.9,21.7,34,8.1,16.1,...,3.5,3.1,0.4,0.4,1.4,2.7,4.4,0.73,0.5,2022-01-16
3,NBA,Devin Booker,SG,PHO,PHO@DET,35.3,23.2,34,8.2,18.3,...,4.9,4.3,0.6,0.3,1.0,3.1,4.5,0.88,0.45,2022-01-16
4,NBA,Chris Paul,PG,PHO,PHO@DET,35.2,14.4,32,5.4,11.5,...,4.5,4.2,0.4,0.2,2.1,2.3,2.3,0.88,0.47,2022-01-16


In [50]:
# Connect to SQLite
conn = sqlite3.connect('Data/PropAnalysis.db')

# Create the connection to the SQLite database
engine = create_engine('sqlite:///Data/PropAnalysis.db', echo=True)
sqlite_connection = engine.connect()

# Update and replace the table in SQLite3
sqlite_table = "SportsLine_Projections"
NBA_SportsLine_Projections.to_sql(sqlite_table, sqlite_connection, if_exists='append')

# Close connection when done
sqlite_connection.close()
conn.close()

2022-01-16 14:00:53,416 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("SportsLine_Projections")
2022-01-16 14:00:53,416 INFO sqlalchemy.engine.Engine [raw sql] ()
2022-01-16 14:00:53,421 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2022-01-16 14:00:53,424 INFO sqlalchemy.engine.Engine INSERT INTO "SportsLine_Projections" ("index", "LEAGUE", "PLAYER", "POS", "TEAM", "GAME", "FP", "PTS", "MIN", "FG", "FGA", "AST", "TRB", "DRB", "ORB", "BK", "ST", "TO", "FT", "FTP", "FGP", "DATE") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2022-01-16 14:00:53,425 INFO sqlalchemy.engine.Engine [generated in 0.00214s] ((0, 'NBA', 'Nikola Jokic', 'C', 'DEN', 'UTA@DEN', 52.1, 25.3, 33, 9.7, 18.5, 7.5, 12.2, 10, 2.2, 0.8, 1.3, 2.6, 4.2, 0.82, 0.52, '2022-01-16'), (1, 'NBA', 'Karl-Anthony Towns', 'C', 'MIN', 'GS@MIN', 38.8, 22.1, 34, 7.7, 16.0, 3.5, 10.8, 7.7, 3.1, 1.4, 0.9, 3.3, 4.1, 0.82, 0.48, '2022-01-16'), (2, 'NBA', 'De`Aaron Fox', 'PG', 'SAC', 'HOU@SAC', 36.9, 21.7

### Connect Bet Lines to Projections 

In [13]:
# Connect to SQLite
conn = sqlite3.connect('Data/PropAnalysis.db')

# Query NBA_Players table into dataframe
SportsLine_Projections = pd.read_sql_query("SELECT * FROM SportsLine_Projections", conn)
# Drop duplicates from SporsLine_Projections table
SportsLine_Projections = SportsLine_Projections.drop_duplicates(keep='last')

# Close connection when done
conn.close()

In [14]:
# Get a list of the players in the projections tables
SportsLine_players = SportsLine_Projections['PLAYER'].to_list()

In [15]:
# For every row in bef_offers_df, check if the label contains the name of a player, if so add it to the Player column
for i, row in bet_offers_dailydf.iterrows():
    betLabel = row["BetLabel"]
    player = [x for x in SportsLine_players if x in betLabel]
    if len(player) > 0:
        # Extract the player name from the list
        player = player[0]
        # Update the Player field in the df
        bet_offers_dailydf.loc[i, "Player"] = player
        # Update the BetLabel to remove the player name to isolate the stat
        bet_offers_dailydf.loc[i, "BetLabel"] = betLabel.replace(player, "").strip()

In [22]:
# Link the SportsLine projection fields with the bet label tracked
stat_label_map = {
    'Assists': 'AST',
    'Points': 'PTS',
    'Rebounds': 'TRB'
}

stat_label_keys = list(stat_label_map.keys())

# Create a column for the SportsLine Projection
for i, row in bet_offers_dailydf.iterrows():
    # Collet needed variables
    BetLabel = row["BetLabel"]
    bet_parts = BetLabel.split("+")
    bet_parts = [x.strip() for x in bet_parts]
    bet_parts.sort()
    player = row["Player"]

    if bet_parts is not None:

        if len(bet_parts) == 3:

            # Check that the labels are those in the stat_label_map 
            # If not this is likely due to a miss in the Player matching that results in overlap in the BetLabel
            if list(bet_parts) == stat_label_keys:
        
                # Create a variable to store the NBA_SportsLine_Projection
                SportsLine_Projection = 0
                
                # Loop over the bet parts and add the projections
                for part in bet_parts:
                    # Use the player name and the statistic to look up the value in the NBA_SportsLine_Projections table
                    projection = SportsLine_Projections.loc[SportsLine_Projections.PLAYER == player, 
                                                                stat_label_map[part]].iloc[0]
                    # Add the projection to the total
                    SportsLine_Projection += projection
                    
                # Set the projection in the bet_offers_dailydf table
                bet_offers_dailydf.loc[i, "SportsLine_Projection"] = SportsLine_Projection

        elif len(bet_parts) == 2:
            
            # Check that both the labels are in the stat_label_lookup
            # If not this is likely due to a miss in the Player matching that results in overlap in the BetLabel
            if (bet_parts[0] in stat_label_keys) & (bet_parts[1] in stat_label_keys):
                
                # Create a variable to store the NBA_SportsLine_Projection
                SportsLine_Projection = 0
                
                # Loop over the bet parts and add the projections
                for part in bet_parts:
                    # Use the player name and the statistic to look up the value in the NBA_SportsLine_Projections table
                    projection = SportsLine_Projections.loc[SportsLine_Projections.PLAYER == player, 
                                                                stat_label_map[part]].iloc[0]
                    # Add the projection to the total
                    SportsLine_Projection += projection
                    
                # Set the projection in the bet_offers_dailydf table
                bet_offers_dailydf.loc[i, "SportsLine_Projection"] = SportsLine_Projection

        elif len(bet_parts) == 1:

            if BetLabel in stat_label_map.keys():
                # Use the player name and the statistic to look up the value in the NBA_SportsLine_Projections table
                SportsLine_Projection = SportsLine_Projections.loc[SportsLine_Projections.PLAYER == player, 
                                                                           stat_label_map[BetLabel]].iloc[0]
                # Set the projection in the bet_offers_dailydf table
                bet_offers_dailydf.loc[i, "SportsLine_Projection"] = SportsLine_Projection

        else:
            
            continue
    
    else:
        
        continue

# Format columns
bet_offers_dailydf['SportsLine_Projection'] = pd.to_numeric(bet_offers_dailydf['SportsLine_Projection'], errors='coerce')
bet_offers_dailydf['DK_Line'] = pd.to_numeric(bet_offers_dailydf['DK_Line'], errors='coerce')
bet_offers_dailydf['Outcome1_Label'] = bet_offers_dailydf['Outcome1_Label'].astype(str)
bet_offers_dailydf['Outcome2_Label'] = bet_offers_dailydf['Outcome2_Label'].astype(str)

# Add a column for difference between the projection and line
bet_offers_dailydf["Line2ProjDiff"] = abs(bet_offers_dailydf['SportsLine_Projection'] - bet_offers_dailydf['DK_Line'])
# Add columns with the implied probabilities of the odds
bet_offers_dailydf['Outcome1_ImpliedProbability'] = bet_offers_dailydf['Outcome1_Odds'].apply(impliedOdds)
bet_offers_dailydf['Outcome2_ImpliedProbability'] = bet_offers_dailydf['Outcome2_Odds'].apply(impliedOdds)
# Add a column for the bet "juice"
bet_offers_dailydf['Bet_Juice'] = bet_offers_dailydf['Outcome1_ImpliedProbability'] 
+ bet_offers_dailydf['Outcome2_ImpliedProbability'] - 100
# Add an empty column for the NBA_Stat that will be filled in later
bet_offers_dailydf['NBA_Stat'] = 'NA'

# Drop rows which contain any NaN value in the selected columns
bet_offers_dailydf = bet_offers_dailydf.dropna(how='any', subset=['SportsLine_Projection', 'DK_Line'])
# Reset the index
bet_offers_dailydf.reset_index(inplace = True, drop = True)

# Preview output
bet_offers_dailydf.head()

Unnamed: 0,League,Game,StartDate,StartTime,Player,BetLabel,SportsLine_Projection,DK_Line,Outcome1_Label,Outcome1_Odds,Outcome2_Label,Outcome2_Odds,Line2ProjDiff,Outcome1_ImpliedProbability,Outcome2_ImpliedProbability,Bet_Juice,NBA_Stat
0,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,Hamidou Diallo,Points + Assists + Rebounds,16.0,21.5,Over,105,Under,-140,5.5,49,58.0,49,
1,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,Hamidou Diallo,Assists + Rebounds,5.8,6.5,Over,125,Under,-165,0.7,44,62.0,44,
2,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,Cameron Payne,Points,8.9,12.5,Over,105,Under,-135,3.6,49,57.0,49,
3,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,Hamidou Diallo,Points + Assists,11.5,15.5,Over,-135,Under,100,4.0,57,50.0,57,
4,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,Hamidou Diallo,Rebounds,4.5,5.5,Over,120,Under,-155,1.0,45,61.0,45,


In [None]:
# Connect to SQLite
conn = sqlite3.connect('Data/PropAnalysis.db')

# Create the connection to the SQLite database
engine = create_engine('sqlite:///Data/PropAnalysis.db', echo=True)
sqlite_connection = engine.connect()

# Update and replace the table in SQLite3
sqlite_table = "bet_offers_df"
bet_offers_dailydf.to_sql(sqlite_table, sqlite_connection, if_exists='append')

# Close connection when done
sqlite_connection.close()
conn.close()

### NBA Game Stats

#### Update NBA Games Table

In [23]:
# Hit RapidApi URL for NBA games for 2021 season
url = "https://api-nba-v1.p.rapidapi.com/games/seasonYear/2021"

headers = {
    'x-rapidapi-host': "api-nba-v1.p.rapidapi.com",
    'x-rapidapi-key': "40cfc5891cmshe48d38938873f74p12526ejsnb332fefe9905"
    }

response = requests.request("GET", url, headers=headers)

# Create a dataframe to append results to
NBA_Games = pd.DataFrame(columns=['gameId', 'seasonYear', 'League', 'homeTeamId', 'awayTeamId', 'startDate', 'startTime', 
                                  'status', 'homeScore', 'awayScore'])

# Set the response as a JSON and extract the games section
games = json.loads(response.text)
games = games['api']['games']

# Loop over the games and parse needed fields
for game in games:
    
    # Only care about NBA games (not G-league)
    league = game['league']
    
    if league == 'standard':
        
        # Grab needed fields
        seasonYear = game['seasonYear']
        gameId = game['gameId']
        startTimeUTC = game['startTimeUTC']
        arena = game['arena']
        city = game['city']
        status = game['statusGame']
        awayTeamId = game['vTeam']['teamId']
        homeTeamId = game['hTeam']['teamId']
        awayScore = game['vTeam']['score']['points']
        homeScore = game['hTeam']['score']['points']
        
        # Convert startTime to EST
        try:
            # Format string as datetime
            startTimeUTC = datetime.strptime(startTimeUTC, "%Y-%m-%dT%H:%M:%S.000Z")
            # Set UTC as timezone
            startTimeUTC = startTimeUTC.replace(tzinfo=timezone('UTC'))
            # Convert timezone to Eastern
            startTime = startTimeUTC.astimezone(timezone('US/Eastern'))
            # Split the date and time
            startDate = startTime.strftime("%Y-%m-%d")
            startTime = startTime.strftime("%H:%M")
        except ValueError:
            startDate = startTimeUTC
            startTime = 'TBD'
        
        # Create list to add to dataframe
        nba_games_list = [gameId, seasonYear, league, homeTeamId, awayTeamId, startDate, startTime, status, 
                          homeScore, awayScore]
        
        # Append the list to the dataframe         
        NBA_Games.loc[len(NBA_Games)] = nba_games_list
        
    else:
        continue
        
NBA_Games.head()

Unnamed: 0,gameId,seasonYear,League,homeTeamId,awayTeamId,startDate,startTime,status,homeScore,awayScore
0,10796,2021,standard,17,4,2021-10-03,15:30,Finished,97,123
1,10797,2021,standard,38,27,2021-10-04,19:00,Finished,123,107
2,10798,2021,standard,2,26,2021-10-04,19:30,Finished,98,97
3,10799,2021,standard,20,1,2021-10-04,19:30,Finished,125,99
4,10800,2021,standard,22,23,2021-10-04,20:00,Finished,117,114


In [24]:
# Connect to SQLite
conn = sqlite3.connect('Data/PropAnalysis.db')

# Create the connection to the SQLite database
engine = create_engine('sqlite:///Data/PropAnalysis.db', echo=True)
sqlite_connection = engine.connect()

# Save NBA_Games dataframe to SQLite
sqlite_table = "NBA_Games"
NBA_Games.to_sql(sqlite_table, sqlite_connection, if_exists='replace')

# Close connections when done
conn.close()
sqlite_connection.close()

2022-01-16 14:12:45,476 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("NBA_Games")
2022-01-16 14:12:45,477 INFO sqlalchemy.engine.Engine [raw sql] ()
2022-01-16 14:12:45,479 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("NBA_Games")
2022-01-16 14:12:45,481 INFO sqlalchemy.engine.Engine [raw sql] ()
2022-01-16 14:12:45,483 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' ORDER BY name
2022-01-16 14:12:45,485 INFO sqlalchemy.engine.Engine [raw sql] ()
2022-01-16 14:12:45,487 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("NBA_Games")
2022-01-16 14:12:45,488 INFO sqlalchemy.engine.Engine [raw sql] ()
2022-01-16 14:12:45,491 INFO sqlalchemy.engine.Engine SELECT sql FROM  (SELECT * FROM sqlite_master UNION ALL   SELECT * FROM sqlite_temp_master) WHERE name = ? AND type = 'table'
2022-01-16 14:12:45,492 INFO sqlalchemy.engine.Engine [raw sql] ('NBA_Games',)
2022-01-16 14:12:45,494 INFO sqlalchemy.engine.Engine PRAGMA main.foreign_key_list("

#### Update NBA Game Stats Table 

In [25]:
# Connect to SQLite
conn = sqlite3.connect('Data/PropAnalysis.db')

# Query NBA_GameStats table into dataframe
NBA_GameStats = pd.read_sql_query("SELECT * FROM NBA_GameStats", conn)

# Close connection when done
conn.close()

In [26]:
# Get a list of gameIds for completed games
completedGameIds = set(NBA_Games[NBA_Games['status'] == 'Finished']['gameId'].tolist())

# Find a list of gameIds already in NBA_GameStats
NBA_GameStats_Games = set(NBA_GameStats['gameId'].tolist())

# Set the list of gameIds to search to only those completed games that we don't already have data for in NBA_GameStats
gameIds = [x for x in completedGameIds if x not in NBA_GameStats_Games]

# Limited to 100 requests per day before getting charged
counter = 0
limit = 100

gameIds

[]

In [23]:
# Create a new NBA_GameStats dataframe that we'll use to append SQLite3 table
NBA_GameStats = pd.DataFrame(columns=['gameId', 'playerId', 'teamId', 'Points', 'Position', 'Minutes', 'FGM', 'FGA',
                                     'FGP', 'FTM', 'FTA', 'TPM', 'TPA', 'TPP', 'offReb', 'defReb', 'totReb', 'assists',
                                      'pFouls', 'steals', 'turnovers', 'blocks', 'plusMinus'])


# Loop over the gameIds and hit RapidAPI for the stats
for game in gameIds:
    print(game)
    
    # Limited to 10 requests per minute, so add a lag
    time.sleep(6)

    # Increment the counter
    counter += 1

    # Hit RapidAPI for this game
    url = f"https://api-nba-v1.p.rapidapi.com/statistics/players/gameId/{game}"

    headers = {
        'x-rapidapi-host': "api-nba-v1.p.rapidapi.com",
        'x-rapidapi-key': "40cfc5891cmshe48d38938873f74p12526ejsnb332fefe9905"
        }

    response = requests.request("GET", url, headers=headers)


    # Set the response as a JSON and extract the games section
    gameStats = json.loads(response.text)
    try:
        gameStats = gameStats['api']['statistics']
    except KeyError:
        print(gameStats)
        continue

    # Loop over the stats and parse needed fields
    for player in gameStats:

        # Grab needed fields
        gameId = player['gameId']
        playerId = player['playerId']
        teamId = player['teamId']
        Points = player['points']
        Position = player['pos']
        Minutes = player['min']
        FGM = player['fgm']
        FGA = player['fga']
        FGP = player['fgp']
        FTM = player['ftm']
        FTA = player['fta']
        TPM = player['tpm']
        TPA = player['tpa']
        TPP = player['tpp']
        offReb = player['offReb']
        defReb = player['defReb']
        totReb = player['totReb']
        assists = player['assists']
        pFouls = player['pFouls']
        steals = player['steals']
        turnovers = player['turnovers']
        blocks = player['blocks']
        plusMinus = player['plusMinus']

        # Create list to add to dataframe
        nba_playerStats_list = [gameId, playerId, teamId, Points, Position, Minutes, FGM, FGA, FGP, FTM, FTA, TPM, TPA, 
                                TPP, offReb, defReb, totReb, assists, pFouls, steals, turnovers, blocks, plusMinus]

        # Append the list to the dataframe         
        NBA_GameStats.loc[len(NBA_GameStats)] = nba_playerStats_list
        
NBA_GameStats.head()

Unnamed: 0,gameId,playerId,teamId,Points,Position,Minutes,FGM,FGA,FGP,FTM,...,TPP,offReb,defReb,totReb,assists,pFouls,steals,turnovers,blocks,plusMinus


In [24]:
# Connect to SQLite
conn = sqlite3.connect('Data/PropAnalysis.db')

# Create the connection to the SQLite database
engine = create_engine('sqlite:///Data/PropAnalysis.db', echo=True)
sqlite_connection = engine.connect()

# Save NBA_GameStats dataframe to SQLite
sqlite_table = "NBA_GameStats"
NBA_GameStats.to_sql(sqlite_table, sqlite_connection, if_exists='append')

# Close connections when done
conn.close()
sqlite_connection.close()

2022-01-16 13:35:35,728 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("NBA_GameStats")
2022-01-16 13:35:35,729 INFO sqlalchemy.engine.Engine [raw sql] ()
2022-01-16 13:35:35,733 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' ORDER BY name
2022-01-16 13:35:35,734 INFO sqlalchemy.engine.Engine [raw sql] ()


### Link NBA Stats to Projections and Lines 

In [27]:
# Connect to SQLite
conn = sqlite3.connect('Data/PropAnalysis.db')

# Query NBA_GameStats table into dataframe
NBA_GameStats = pd.read_sql_query("SELECT * FROM NBA_GameStats", conn)

# Query bet_offers_df table into dataframe
bet_offers_df = pd.read_sql_query("SELECT * FROM bet_offers_df", conn)

# Close connection when done
conn.close()

NBA_GameStats.head()

Unnamed: 0,index,gameId,playerId,teamId,Points,Position,Minutes,FGM,FGA,FGP,...,TPP,offReb,defReb,totReb,assists,pFouls,steals,turnovers,blocks,plusMinus
0,0,10815,44,17,6,,15:13,2,4,50.0,...,0.0,0,2,2,0,4,0,3,1,-21
1,1,10815,126,17,14,C,24:55,5,14,35.7,...,33.3,1,7,8,2,0,2,0,2,-10
2,2,10815,286,17,6,,19:46,3,3,100.0,...,0.0,0,7,7,1,2,0,1,1,-9
3,3,10815,1007,17,11,SG,26:52,5,12,41.7,...,20.0,0,3,3,2,1,0,1,0,-15
4,4,10815,1867,17,11,SF,23:40,3,7,42.9,...,50.0,1,2,3,3,1,1,5,0,-1


In [28]:
bet_offers_df.tail()

Unnamed: 0,index,League,Game,StartDate,StartTime,Player,BetLabel,SportsLine_Projection,DK_Line,Outcome1_Label,...,Outcome2_Label,Outcome2_Odds,Line2ProjDiff,Outcome1_ImpliedProbability,Outcome2_ImpliedProbability,Bet_Juice,NBA_Stat,BetSuggestion,BetResult,W/L
760,800,NBA,ORL Magic @ DAL Mavericks,"Sat, Jan 15",09:40 PM,Gary Harris,Points + Assists,11.9,15.5,Over,...,Under,-135,3.6,50,57.0,50.0,8.0,Under,Under,W
761,801,NBA,ORL Magic @ DAL Mavericks,"Sat, Jan 15",09:40 PM,Gary Harris,Assists + Rebounds,4.0,4.5,Over,...,Under,-175,0.5,44,64.0,44.0,2.0,Under,Under,W
762,802,NBA,ORL Magic @ DAL Mavericks,"Sat, Jan 15",09:40 PM,Franz Wagner,Points + Rebounds,20.3,20.5,Over,...,Under,-105,0.2,56,51.0,56.0,8.0,Under,Under,W
763,803,NBA,ORL Magic @ DAL Mavericks,"Sat, Jan 15",09:40 PM,Franz Wagner,Assists + Rebounds,7.6,7.5,Over,...,Under,105,0.1,59,49.0,59.0,5.0,Over,Under,L
764,804,NBA,ORL Magic @ DAL Mavericks,"Sat, Jan 15",09:40 PM,Franz Wagner,Points + Assists,18.1,18.5,Over,...,Under,-115,0.4,55,53.0,55.0,9.0,Under,Under,W


In [31]:
# Isolate bet_offers_df to just rows that have a missing value in NBA_Stat
bet_offers_df = bet_offers_df[bet_offers_df['NBA_Stat'] == 'NA']
bet_offers_df.head()

Unnamed: 0,index,League,Game,StartDate,StartTime,Player,BetLabel,SportsLine_Projection,DK_Line,Outcome1_Label,...,Outcome2_Label,Outcome2_Odds,Line2ProjDiff,Outcome1_ImpliedProbability,Outcome2_ImpliedProbability,Bet_Juice,NBA_Stat,BetSuggestion,BetResult,W/L


In [40]:
# Link the NBA stat fields with the bet label tracked
stat_label_map = {
    'Assists': 'assists',
    'Points': 'Points',
    'Rebounds': 'totReb'
}

# Loop over the rows of the bet_offers_dailydf and pull in NBA game stats for that player
for i, row in bet_offers_df.iterrows():

    # Extract needed fields
    Game = row['Game']
    startDate = row['StartDate']
    startTime = row['StartTime']
    Player = row['Player']
    try:
        playerId = NBA_Players.loc[NBA_Players["fullName"] == Player, "playerId"].values[0]
    except:
        playerId = 0
    betLabel = row['BetLabel']    
    bet_parts = betLabel.split("+")
    bet_parts = [x.strip() for x in bet_parts]
    bet_parts.sort()
    # Find the team Ids
    hTeamId, aTeamId = findTeamIds(Game)
    # Find the gameId 
    gameId = findGameId(Game, startDate, startTime)
    print(Player, playerId, hTeamId, aTeamId, gameId, bet_parts)

    # Find the NBA Stats if the game and player are found
    if (playerId != 0) & (gameId != 0):

        # Extract the NBA Stats from the NBA_GameStats table based off of the betLabel
        if len(bet_parts) == 3:

            # Check that the labels are those in the stat_label_map 
            # If not this is likely due to a miss in the Player matching that results in overlap in the BetLabel
            if list(bet_parts) == stat_label_keys:

                # Create a variable to store the NBA Stat
                NBA_Stat = 0

                # Loop over the bet parts and add the projections
                for part in bet_parts:

                    try:
                        # Use the player name and the statistic to look up the value in the NBA_GameStats table
                        stat = NBA_GameStats.loc[(NBA_GameStats.gameId == gameId) & (NBA_GameStats.playerId == playerId),
                                                stat_label_map[part]].iloc[0]
                        # Add the projection to the total
                        NBA_Stat += int(stat)
                    except:
                        NBA_Stat = 'NA'

                # Set the stat in the bet_offers_dailydf table
                bet_offers_df.loc[i, "NBA_Stat"] = NBA_Stat

        elif len(bet_parts) == 2:

            # Check that both the labels are in the stat_label_lookup
            # If not this is likely due to a miss in the Player matching that results in overlap in the BetLabel
            if (bet_parts[0] in stat_label_keys) & (bet_parts[1] in stat_label_keys) :

                # Create a variable to store the NBA Stat
                NBA_Stat = 0

                # Loop over the bet parts and add the projections
                for part in bet_parts:
                    try:
                        # Use the player name and the statistic to look up the value in the NBA_GameStats table
                        stat = NBA_GameStats.loc[(NBA_GameStats.gameId == gameId) & (NBA_GameStats.playerId == playerId),
                                                stat_label_map[part]].iloc[0]
                        # Add the projection to the total
                        NBA_Stat += int(stat)
                    except:
                        NBA_Stat = 'NA'

                print(NBA_Stat)
                # Set the stat in the bet_offers_dailydf table
                bet_offers_df.loc[i, "NBA_Stat"] = NBA_Stat

        elif len(bet_parts) == 1:

            if betLabel in stat_label_map.keys():
                try:
                    # Use the player name and the statistic to look up the value in the NBA_GameStats table
                    NBA_Stat = int(NBA_GameStats.loc[(NBA_GameStats.gameId == gameId) & (NBA_GameStats.playerId == playerId),
                                            stat_label_map[betLabel]].iloc[0])
                except:
                    NBA_Stat = 'NA'
                # Set the stat in the bet_offers_dailydf table
                bet_offers_df.loc[i, "NBA_Stat"] = NBA_Stat

    else:

        continue

bet_offers_df.head()

Hamidou Diallo 960 10 28 10182 ['Points', 'Rebounds']
NA


Unnamed: 0,League,Game,StartDate,StartTime,Player,BetLabel,SportsLine_Projection,DK_Line,Outcome1_Label,Outcome1_Odds,Outcome2_Label,Outcome2_Odds,Line2ProjDiff,Outcome1_ImpliedProbability,Outcome2_ImpliedProbability,Bet_Juice,NBA_Stat
0,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,Hamidou Diallo,Points + Rebounds,14.7,18.5,Over,-130,Under,100,3.8,56,50,56,
1,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,Hamidou Diallo,Points + Assists + Rebounds,14.7,20.5,Over,-105,Under,-125,5.8,51,56,51,
2,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,Hamidou Diallo,Assists + Rebounds,5.8,6.5,Over,-125,Under,-105,0.7,56,51,56,
3,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,Cameron Payne,Points,8.9,9.5,Over,-145,Under,110,0.6,59,48,59,
4,NBA,PHO Suns @ DET Pistons,"Sun, Jan 16",01:10 PM,Hamidou Diallo,Points + Assists,11.5,14.5,Over,-135,Under,105,3.0,57,49,57,


### Grade the Bets versus the Projections

In [324]:
# Format columns
bet_offers_dailydf['NBA_Stat'] = pd.to_numeric(bet_offers_dailydf.loc[:,'NBA_Stat'], errors='coerce')
# Drop rows that we couldn't get an NBA_Stat for 
bet_offers_dailydf = bet_offers_dailydf.dropna(how='any', subset=['NBA_Stat'])
# Create a column for the bet suggestion based off the SportsLine_Projection and DK_Line
bet_offers_dailydf.loc[bet_offers_dailydf['SportsLine_Projection'] > 
                       bet_offers_dailydf['DK_Line'], 'BetSuggestion'] = 'Over'
bet_offers_dailydf.loc[bet_offers_dailydf['SportsLine_Projection'] == 
                       bet_offers_dailydf['DK_Line'], 'BetSuggestion'] = 'Push'
bet_offers_dailydf.loc[bet_offers_dailydf['SportsLine_Projection'] < 
                       bet_offers_dailydf['DK_Line'], 'BetSuggestion'] = 'Under'
# Create a column for the bet result based off the DK_Line and NBA_Stat
bet_offers_dailydf.loc[bet_offers_dailydf['NBA_Stat'] > bet_offers_dailydf['DK_Line'], 'BetResult'] = 'Over'
bet_offers_dailydf.loc[bet_offers_dailydf['NBA_Stat'] == bet_offers_dailydf['DK_Line'], 'BetResult'] = 'Push'
bet_offers_dailydf.loc[bet_offers_dailydf['NBA_Stat'] < bet_offers_dailydf['DK_Line'], 'BetResult'] = 'Under'
# Create a column for whether the bet was "won" or "lost"
bet_offers_dailydf.loc[bet_offers_dailydf['BetSuggestion'] == bet_offers_dailydf['BetResult'], 'W/L'] = 'W'
bet_offers_dailydf.loc[bet_offers_dailydf['BetSuggestion'] != bet_offers_dailydf['BetResult'], 'W/L'] = 'L'
# Create a column with a "bet grade" for the bet based off the Line2ProjDiff
bet_offers_dailydf['BetGrade'] = bet_offers_dailydf['Line2ProjDiff'].apply(betGrade)

In [375]:
# Create a table to show the win results by bet grade
df = bet_offers_df.groupby(['BetGrade', 'W/L'])['W/L'].size()
df_pcts = df.groupby(level=0).apply(lambda x: round(100 * x / float(x.sum()),1))
df_pcts

BetGrade  W/L
A         L       8.3
          W      91.7
B         L      32.1
          W      67.9
C         L      50.3
          W      49.7
D         L      46.9
          W      53.1
F         L      55.7
          W      44.3
Name: W/L, dtype: float64

### Update SQL Tables

In [None]:
# Pull in bet_offers_df

# Append new results

# Remove any duplicates and keep the most recent record

# Update and replace the table in SQLite3