# Bet Prop Analysis

## Part 2 - Updates 

This script is meant to handle the daily data updates to the SQLite database (PropAnalysis.db). Just like in Part 1, this script will scrape DraftKings for NBA and NFL bet odds, SportsLine for the NBA and NFL projections, and RapidAPI's API-NBA feed for the actual game stats. However, in this script we will not be replacing the table, but appending to it. 

During this update process we will also be able to "score" our bets to see if they won or lost. After collecting a good sample size of results, we will be able to compare our win rate to our estimated edge to see if their is a good correlation that would imply a profitable betting model. 

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
import json
from datetime import datetime, timedelta
from pytz import timezone
import sqlite3
from sqlalchemy import create_engine
import time

### Helper Functions 

In [9]:
def find_between( s, first, last ):
    try:
        start = s.index( first ) + len( first )
        end = s.index( last, start )
        return s[start:end]
    except ValueError:
        return ""

In [10]:
def american2DecimalOdds(americanBetOdds):
    """
    @betOdds (str) the American odds of the bet. Must be prefaced with a '+' or '-'
    
    Returns the bet odds in decimal format. 
    """
    try:
        if (americanBetOdds[0] == '+'):
            americanBetOdds = int(americanBetOdds[1:])
            decimalOdds = (americanBetOdds/100) + 1
            # Return the decimalOdds in a clean format
            return(round(decimalOdds,2))
        elif americanBetOdds[0] == '-':
            americanBetOdds = int(americanBetOdds[1:])
            decimalOdds = (100/americanBetOdds) + 1
            # Return the decimalOdds in a clean format
            return(round(decimalOdds,2))
        else:
            print("Bet odds must begin with a '+' or '-'")
    except:
        return(None)

In [11]:
def impliedOdds(betOdds):
    """
    @betOdds (str) the American odds of the bet. Must be prefaced with a '+' or '-'
    
    Takes in the betOdds and returns the implied probability of the bet
    """
    try:
        # First need to convert the American odds to decimal odds
        decimalOdds = american2DecimalOdds(betOdds)

        # Use the decimal odds to return the implied probability
        probability = 1/decimalOdds * 100

        # Return the probability rounded to the nearest whole number
        return(round(probability))
    except:
        return(None)

## Pull in SQLite Tables 

In [13]:
# Connect to SQLite
conn = sqlite3.connect('Data/PropAnalysis.db')

# Query bet_offers_df table into dataframe
bet_offers_df = pd.read_sql_query("SELECT * FROM bet_offers_df", conn)

# Query NBA_Teams table into dataframe
NBA_Teams = pd.read_sql_query("SELECT * FROM NBA_Teams", conn)

# Query NBA_Games table into dataframe
NBA_Games = pd.read_sql_query("SELECT * FROM NBA_Games", conn)

# Query NBA_Players table into dataframe
NBA_Players = pd.read_sql_query("SELECT * FROM NBA_Players", conn)

# Query NBA_GameStats table into dataframe
NBA_GameStats = pd.read_sql_query("SELECT * FROM NBA_GameStats", conn)

# Close connection when done
conn.close()

## Pull Today's Data 

### DraftKings - NBA and NFL  

In [6]:
# Create a dataframe to append results to
bet_offers_dailydf = pd.DataFrame(columns=['League', 'Game', 'StartDate', 'StartTime', 'Player', 'BetLabel', 
                                      'SportsLine_Projection', 'DK_Line', 'Outcome1_Label', 'Outcome1_Odds', 
                                      'Outcome2_Label', 'Outcome2_Odds'])

# Create a list of the DraftKings URLs to parse
urls = ["https://sportsbook.draftkings.com/leagues/basketball/88670846", 
        "https://sportsbook.draftkings.com/leagues/football/88670561"]

# Loop over the URLs and parse results
for url in urls:
    
    # Get the BeautifulSoup data from the DraftKings website
    dk_response = requests.get(url)
    dk_soup = BeautifulSoup(dk_response.text, "html.parser")

    # Narrow the Beautiful Soup extract to just the field of interest "window.__INITIAL_STATE__"

    # Filter out opening and closing <script> tags
    dk_scrape = str(list(list(list(dk_soup.children)[2])[3])[11]).replace("<script>","").replace("</script>", "")
    # Remove leading and trailing whitespace
    dk_scrape = dk_scrape.strip()
    # Split sections
    dk_scrape = dk_scrape.split(";\n")
    # Isolate to json of interest
    dk_scrape = dk_scrape[6].strip()
    # Format as json dictionary
    dk_scrape = dk_scrape.replace("window.__INITIAL_STATE__ = ","")
    dk_scrape = json.loads(dk_scrape)
    
    # Grab the sport ID from the scrape
    sportId = list(dk_scrape['eventGroups'].keys())[0]
    
    # From the full scrape of the page, pull a list of the games to loop over and extract data from
    games = dk_scrape['eventGroups'][sportId]['events'].keys()
    
    # Loop over games
    for index, game in enumerate(games):

        # Set the game JSON as variable
        game_details = dk_scrape['eventGroups'][sportId]['events'][game]
        # Get the eventId so I can scrape the actual props
        eventId = game_details['eventId']

        # Web scrape Draft Kings for player props
        props_url = f"https://sportsbook.draftkings.com/event/{eventId}"
        response = requests.get(props_url)
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Clean BeautifulSoup response
        # Filter out opening and closing <script> tags
        scrape = str(list(list(list(soup.children)[2])[3])[11]).replace("<script>","").replace("</script>", "")
        # Remove leading and trailing whitespace
        scrape = scrape.strip()
        # Split sections
        scrape = scrape.split(";\n")
        # Isolate to json of interest
        scrape = scrape[6].strip()
        # Format as json dictionary
        scrape = scrape.replace("window.__INITIAL_STATE__ = ","")
        scrape = json.loads(scrape)
        
        # Parse the scrape results
        eventGroupId = list(scrape['eventGroups'].keys())[0]
        providerEventId = list(scrape['eventGroups'][eventGroupId]['events'].keys())[0]
        providerOfferId = list(scrape['offers'][eventGroupId].keys())[0]
        eventId = scrape['eventGroups'][eventGroupId]['events'][providerEventId]['eventId']
        game_details = scrape['eventGroups'][eventGroupId]['events'][providerEventId]
        game = game_details['name']
        eventGroup = game_details['eventGroupName']
        teamName1 = game_details['teamName1']
        teamName1 = game_details['teamName2']
        startDate = game_details['startDate']
        date, time = startDate.split('T')
        dt = date + ' ' + time[:8]
        dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S')
        # Format as right timezone (-5 hours)
        dt = dt - timedelta(hours=5)
        date = dt.strftime('%a, %b %d')
        time = dt.strftime('%I:%M %p')

        # Isolate the bet offers from the beautiful soup scrape
        offers = scrape['offers'][eventGroupId]

        # Loop over offers and add to dataframe
        for index, offer in enumerate(offers):

            # Create a list for parsing the offers 
            offers_parsed = [eventGroup, game, date, time, '', '', '', '', '', '', '', '']

            # Parse the betting offer
            offer_dict = scrape['offers'][eventGroupId][offer]
            try:
                providerOfferId = offer_dict['providerOfferId']
                providerId = offer_dict['providerId']
                providerEventId = offer_dict['providerEventId']
                bet_label = offer_dict['label']
                isOpen = offer_dict['isOpen']
                outcomes = offer_dict['outcomes']
            except:
                continue

            # Assign to list
            offers_parsed[5] = bet_label    

            # Extract outcomes
            if len(outcomes) == 1:

                # Parse the outcome
                outcome_label = outcomes[0]['label']
                outcome_odds = outcomes[0]['oddsAmerican']
                offers_parsed[8] = outcome_label
                offers_parsed[9] = outcome_odds

                # Append the list to the dataframe 
                bet_offers_dailydf.loc[len(bet_offers_dailydf)] = offers_parsed

            elif len(outcomes) == 2:

                # Parse the outcomes
                for i, x in enumerate(outcomes):
                    if i == 0:
                        outcome_label = outcomes[i]['label']
                        try:
                            outcome_line = outcomes[i]['line']
                        except:
                            outcome_line = ''
                        outcome_odds = outcomes[i]['oddsAmerican']
                        offers_parsed[7] = outcome_line
                        offers_parsed[8] = outcome_label
                        offers_parsed[9] = outcome_odds
                    else:
                        outcome_label = outcomes[i]['label']
                        try:
                            outcome_line = outcomes[i]['line']
                        except:
                            continue
                        outcome_odds = outcomes[i]['oddsAmerican']
                        offers_parsed[10] = outcome_label
                        offers_parsed[11] = outcome_odds

                # Append the list to the dataframe 
                bet_offers_dailydf.loc[len(bet_offers_dailydf)] = offers_parsed

            else:

                continue

# Preview output
bet_offers_dailydf.head()

Unnamed: 0,League,Game,StartDate,StartTime,Player,BetLabel,SportsLine_Projection,DK_Line,Outcome1_Label,Outcome1_Odds,Outcome2_Label,Outcome2_Odds
0,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",07:30 PM,,Total Points Odd/Even - 2nd Half,,,Even,-110,,
1,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",07:30 PM,,Total Points Odd/Even - 3rd Quarter,,,Even,-115,,
2,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",07:30 PM,,Total Points Odd/Even,,,Odd,-110,,
3,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",07:30 PM,,Winning Margin (3 Way),,,To Win By 6+,2800,,
4,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",07:30 PM,,Gary Payton II Points + Rebounds,,11.5,Over,-140,Under,105.0


### SportsLine Projections - NBA 

In [7]:
# Web scrape Fantasy Pros for relevant information
url = 'https://www.sportsline.com/nba/expert-projections/simulation/'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Isolate soup to the table of interest
output = str(list(list(list(soup.children)[1])[1])[1])
# Format as json dictionary
output = output.replace("""<script id="__NEXT_DATA__" type="application/json">""","").replace("</script>","")
output = json.loads(output)
projections = output['props']['initialState']['fantasyState']['projectionsPageState']['data']['projections']

# Create a dataframe to append results to
NBA_SportsLine_Projections = pd.DataFrame(columns=['LEAGUE', 'PLAYER', 'POS', 'TEAM', 'GAME', 'FP', 'PTS', 'MIN', 'FG',
                                                   'FGA', 'AST', 'TRB', 'DRB', 'ORB', 'BK', 'ST', 'TO', 'FT', 'FTP', 'FGP'])
                                                   
# Loop over the projections and parse
for i, projection in enumerate(projections):
    # Create an empty list to update the data for
    player_projections = [0] * 20
    # Set LEAGUE columns
    player_projections[0] = 'NBA'
    # Loop over list of projections to scrape
    projectionFields = projection['projectionFields']
    # Create a counter to add additional fields to correct list position
    counter = 1
    for x in projectionFields:
        field = x['field']
        if field in NBA_SportsLine_Projections.columns:
            try:
                value = x['value']
                player_projections[counter] = value
            except KeyError:
                player_projections[counter] = 0
            counter += 1

    # Append the list to the dataframe         
    NBA_SportsLine_Projections.loc[len(NBA_SportsLine_Projections)] = player_projections

# Preview output
NBA_SportsLine_Projections.head()

Unnamed: 0,LEAGUE,PLAYER,POS,TEAM,GAME,FP,PTS,MIN,FG,FGA,AST,TRB,DRB,ORB,BK,ST,TO,FT,FTP,FGP
0,NBA,Nikola Jokic,C,DEN,POR@DEN,54.3,26.5,33,10.2,18.2,7.7,12.8,9.8,3.0,0.8,1.5,2.8,4.4,0.81,0.56
1,NBA,James Harden,SG,BKN,OKC@BKN,50.9,26.1,37,7.8,18.9,9.9,8.2,7.2,1.0,0.7,1.2,5.1,7.4,0.87,0.41
2,NBA,Giannis Antetokounmpo,PF,MIL,GS@MIL,50.1,27.8,33,9.9,18.3,6.0,11.4,9.3,2.1,1.4,1.0,3.5,6.8,0.69,0.54
3,NBA,Ja Morant,PG,MEM,MIN@MEM,42.3,24.0,34,9.1,18.0,7.7,5.1,4.2,0.8,0.5,1.2,3.9,4.3,0.76,0.51
4,NBA,Stephen Curry,PG,GS,GS@MIL,40.9,25.5,35,8.3,19.4,5.9,5.0,4.6,0.4,0.4,1.1,2.8,4.0,0.92,0.43


### SportsLine Projections - NFL

In [8]:
# Web scrape Fantasy Pros for relevant information
url = 'https://www.sportsline.com/nfl/expert-projections/simulation/'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Isolate soup to the table of interest
output = str(list(list(list(soup.children)[1])[1])[1])
# Format as json dictionary
output = output.replace("""<script id="__NEXT_DATA__" type="application/json">""","").replace("</script>","")
output = json.loads(output)
projections = output['props']['initialState']['fantasyState']['projectionsPageState']['data']['projections']

# Create a dataframe to append results to
NFL_SportsLine_Projections = pd.DataFrame(columns=['LEAGUE', 'PLAYER', 'POS', 'TEAM', 'GAME', 'FP', 'PASSYD', 
                                                   'RUSHYD', 'RECYD'])

# Loop over the projections and parse
for i, projection in enumerate(projections):
    # Create an empty list to update the data for
    player_projections = [0] * 9
    # Set LEAGUE columns
    player_projections[0] = 'NFL'
    # Loop over list of projections to scrape
    projectionFields = projection['projectionFields']
    # Create a counter to add additional fields to correct list position
    counter = 1
    for x in projectionFields:
        field = x['field']
        if field in NFL_SportsLine_Projections.columns:
            try:
                value = x['value']
                player_projections[counter] = value
            except KeyError:
                player_projections[counter] = 0
            counter += 1

    # Append the list to the dataframe         
    NFL_SportsLine_Projections.loc[len(NFL_SportsLine_Projections)] = player_projections

# Preview output            
NFL_SportsLine_Projections.head()

Unnamed: 0,LEAGUE,PLAYER,POS,TEAM,GAME,FP,PASSYD,RUSHYD,RECYD
0,NFL,Tom Brady,QB,TB,PHI@TB,25.68,297,3,0
1,NFL,Patrick Mahomes,QB,KC,PIT@KC,25.41,275,23,0
2,NFL,Joe Burrow,QB,CIN,LV@CIN,24.89,260,9,0
3,NFL,Josh Allen,QB,BUF,NE@BUF,22.69,230,43,0
4,NFL,Dak Prescott,QB,DAL,SF@DAL,21.95,268,10,0


### Connect Bet Lines to Projections 

In [14]:
# Get a list of the players in the projections tables
nba_players = NBA_SportsLine_Projections['PLAYER'].to_list()
nfl_players = NFL_SportsLine_Projections['PLAYER'].to_list()
SportsLine_players = nba_players + nfl_players

# For every row in bef_offers_df, check if the label contains the name of a player, if so add it to the Player column
for i, row in bet_offers_dailydf.iterrows():
    betLabel = row["BetLabel"]
    player = [x for x in SportsLine_players if x in betLabel]
    if len(player) > 0:
        # Extract the player name from the list
        player = player[0]
        # Update the Player field in the df
        bet_offers_dailydf.loc[i, "Player"] = player
        # Update the BetLabel to remove the player name to isolate the stat
        bet_offers_dailydf.loc[i, "BetLabel"] = betLabel.replace(player, "").strip()
        
# Link the SportsLine projection fields with the bet label tracked
stat_label_map = {
    'Assists': 'AST',
    'Points': 'PTS',
    'Rebounds': 'TRB'
}

stat_label_keys = list(stat_label_map.keys())

# Create a column for the SportsLine Projection
for i, row in bet_offers_dailydf.iterrows():
    # Collet needed variables
    BetLabel = row["BetLabel"]
    bet_parts = BetLabel.split("+")
    bet_parts = [x.strip() for x in bet_parts]
    bet_parts.sort()
    player = row["Player"]

    if bet_parts is not None:

        if len(bet_parts) == 3:

            # Check that the labels are those in the stat_label_map 
            # If not this is likely due to a miss in the Player matching that results in overlap in the BetLabel
            if list(bet_parts) == stat_label_keys:
        
                # Create a variable to store the NBA_SportsLine_Projection
                NBA_SportsLine_Projection = 0
                # Loop over the bet parts and add the projections
                for part in bet_parts:
                    # Use the player name and the statistic to look up the value in the NBA_SportsLine_Projections table
                    projection = NBA_SportsLine_Projections.loc[NBA_SportsLine_Projections.PLAYER == player, 
                                                                stat_label_map[part]].iloc[0]
                    # Add the projection to the total
                    NBA_SportsLine_Projection += projection
                # Set the projection in the bet_offers_dailydf table
                bet_offers_dailydf.loc[i, "SportsLine_Projection"] = NBA_SportsLine_Projection

        elif len(bet_parts) == 2:
            
            # Check that both the labels are in the stat_label_lookup
            # If not this is likely due to a miss in the Player matching that results in overlap in the BetLabel
            if (bet_parts[0] in stat_label_keys) & (bet_parts[1] in stat_label_keys) :
                # Create a variable to store the NBA_SportsLine_Projection
                NBA_SportsLine_Projection = 0
                # Loop over the bet parts and add the projections
                for part in bet_parts:
                    # Use the player name and the statistic to look up the value in the NBA_SportsLine_Projections table
                    projection = NBA_SportsLine_Projections.loc[NBA_SportsLine_Projections.PLAYER == player, 
                                                                stat_label_map[part]].iloc[0]
                    # Add the projection to the total
                    NBA_SportsLine_Projection += projection
                # Set the projection in the bet_offers_dailydf table
                bet_offers_dailydf.loc[i, "SportsLine_Projection"] = NBA_SportsLine_Projection

        elif len(bet_parts) == 1:

            if BetLabel in stat_label_map.keys():
                # Use the player name and the statistic to look up the value in the NBA_SportsLine_Projections table
                NBA_SportsLine_Projection = NBA_SportsLine_Projections.loc[NBA_SportsLine_Projections.PLAYER == player, 
                                                                           stat_label_map[BetLabel]].iloc[0]
                # Set the projection in the bet_offers_dailydf table
                bet_offers_dailydf.loc[i, "SportsLine_Projection"] = NBA_SportsLine_Projection

        else:
            
            continue
    
    else:
        
        continue

# Format columns
bet_offers_dailydf['SportsLine_Projection'] = pd.to_numeric(bet_offers_dailydf['SportsLine_Projection'], errors='coerce')
bet_offers_dailydf['DK_Line'] = pd.to_numeric(bet_offers_dailydf['DK_Line'], errors='coerce')
bet_offers_dailydf['Outcome1_Label'] = bet_offers_dailydf['Outcome1_Label'].astype(str)
bet_offers_dailydf['Outcome2_Label'] = bet_offers_dailydf['Outcome2_Label'].astype(str)

# Add a column for difference between the projection and line
bet_offers_dailydf["Line2ProjDiff"] = bet_offers_dailydf['SportsLine_Projection'] - bet_offers_dailydf['DK_Line']
# Add columns with the implied probabilities of the odds
bet_offers_dailydf['Outcome1_ImpliedProbability'] = bet_offers_dailydf['Outcome1_Odds'].apply(impliedOdds)
bet_offers_dailydf['Outcome2_ImpliedProbability'] = bet_offers_dailydf['Outcome2_Odds'].apply(impliedOdds)
# Add a column for the bet "juice"
bet_offers_dailydf['Bet_Juice'] = bet_offers_dailydf['Outcome1_ImpliedProbability'] 
+ bet_offers_dailydf['Outcome2_ImpliedProbability'] - 100

# Drop rows which contain any NaN value in the selected columns
bet_offers_dailydf = bet_offers_dailydf.dropna(how='any', subset=['SportsLine_Projection', 'DK_Line'])

# Preview output
bet_offers_dailydf.head()

Unnamed: 0,League,Game,StartDate,StartTime,Player,BetLabel,SportsLine_Projection,DK_Line,Outcome1_Label,Outcome1_Odds,Outcome2_Label,Outcome2_Odds,Line2ProjDiff,Outcome1_ImpliedProbability,Outcome2_ImpliedProbability,Bet_Juice
10,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",07:30 PM,Grayson Allen,Assists,1.7,0.5,Over,115,Under,-155,1.2,47,61.0,47
11,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",07:30 PM,Grayson Allen,Points + Rebounds,16.1,19.5,Over,-125,Under,-110,-3.4,56,52.0,56
12,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",07:30 PM,Grayson Allen,Points,12.8,16.5,Over,-115,Under,-115,-3.7,53,53.0,53
13,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",07:30 PM,Andrew Wiggins,Points + Rebounds,20.7,16.5,Over,105,Under,-135,4.2,49,57.0,49
14,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",07:30 PM,Andrew Wiggins,Rebounds,4.0,2.5,Over,105,Under,-135,1.5,49,57.0,49


In [15]:
Game = "GS Warriors @ MIL Bucks"
startDate = "Thu, Jan 13"
startTime = "07:30 PM"
HTeam, ATeam = Game.split("@")
print(HTeam, ATeam)

GS Warriors   MIL Bucks


In [None]:
def findGameId(Game, StartDate, StartTime):
    """
    
    """