In [23]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
import json
from datetime import date, datetime, timedelta
from pytz import timezone
import scipy.stats
import sqlite3
from sqlalchemy import create_engine
import time

## Helper Functions

In [44]:
def american2DecimalOdds(americanBetOdds):
    """
    @betOdds (str) the American odds of the bet. Must be prefaced with a '+' or '-'
    
    Returns the bet odds in decimal format. 
    """
    try:
        if (americanBetOdds[0] == '+'):
            americanBetOdds = int(americanBetOdds[1:])
            decimalOdds = (americanBetOdds/100) + 1
            # Return the decimalOdds in a clean format
            return(round(decimalOdds,2))
        elif americanBetOdds[0] == '-':
            americanBetOdds = int(americanBetOdds[1:])
            decimalOdds = (100/americanBetOdds) + 1
            # Return the decimalOdds in a clean format
            return(round(decimalOdds,2))
        else:
            print("Bet odds must begin with a '+' or '-'")
    except:
        return(None)

In [45]:
def impliedOdds(betOdds):
    """
    @betOdds (str) the American odds of the bet. Must be prefaced with a '+' or '-'
    
    Takes in the betOdds and returns the implied probability of the bet
    """
    try:
        # First need to convert the American odds to decimal odds
        decimalOdds = american2DecimalOdds(betOdds)

        # Use the decimal odds to return the implied probability
        probability = 1/decimalOdds * 100

        # Return the probability rounded to the nearest whole number
        return(round(probability))
    except:
        return(None)

In [92]:
def findPlayerStatStdDevs(Player, BetLabel):
    """
    @Player str, the name of the player. Ex: Kevon Looney
    @BetLabel str, the DraftKings listed bet name. Ex: Points + Assists + Rebound
    """
    # Find Player Id 
    try:
        playerId = NBA_Players.loc[NBA_Players["fullName"] == Player, "playerId"].values[0]
    except IndexError:
        return('NA')

    # Filter dataset to Giannis Antetokounmpo 
    df = NBA_GameStats[NBA_GameStats['playerId'] == 20]

    # Split betLabel into component parts, if possible  
    bet_parts = betLabel.split("+")
    bet_parts = [x.strip() for x in bet_parts]
    bet_parts.sort()

    # Link the NBA stat fields with the bet label tracked
    stat_label_map = {
        'Assists': 'assists',
        'Points': 'Points',
        'Rebounds': 'totReb'
    }
    stat_label_keys = list(stat_label_map.keys())

    # Extract the std_dev of NBA Stats from the NBA_GameStats table based off of the betLabel
    if len(bet_parts) == 3:

        # Check that the labels are those in the stat_label_map 
        # If not this is likely due to a miss in the Player matching that results in overlap in the BetLabel
        if list(bet_parts) == stat_label_keys:

            # Create a variable to store the NBA Stat
            std_dev = 0

            # Loop over the bet parts, find the standard deviation, and add them
            for part in bet_parts:

                # Find std deviations for stats of interest (Points, Assists, Rebounds)
                s = round(df.loc[:,stat_label_map[part]].std(),1)
                std_dev += s


    elif len(bet_parts) == 2:

        # Check that both the labels are in the stat_label_lookup
        # If not this is likely due to a miss in the Player matching that results in overlap in the BetLabel
        if (bet_parts[0] in stat_label_keys) & (bet_parts[1] in stat_label_keys) :

            # Create a variable to store the NBA Stat
            std_dev = 0

            # Loop over the bet parts, find the standard deviation, and add them
            for part in bet_parts:

                # Find std deviations for stats of interest (Points, Assists, Rebounds)
                s = round(df.loc[:,stat_label_map[part]].std(),1)
                std_dev += s

    else:

        if betLabel in stat_label_map.keys():

                # Find std deviations for stats of interest (Points, Assists, Rebounds)
                std_dev = round(df.loc[:,stat_label_map[part]].std(),1)
    
    return(std_dev)

In [91]:
def findEstimatedEdge(SportsLine_Projection, DK_Line, Std_Dev, Over_Odds, Under_Odds):
    """
    @SportsLine_Projection float
    @DK_Line float
    @StdDev float
    @Over_Odds text
    @Under_Odds text
    """
    # Find the edge if a Std_Dev was found
    if Std_Dev != 'NA':
        
        # Find the odds of going over and under the number
        cdf = scipy.stats.norm(SportsLine_Projection, Std_Dev).cdf(DK_Line)
        over_prob = round((1 - cdf) * 100,1)
        under_prob = round(cdf * 100,1)

        # Convert the odds to implied probability
        over_implied_odds = impliedOdds(over_odds)
        under_implied_odds = impliedOdds(under_odds)

        # Find the expected edge by comparing the probabilities with the implied odds
        over_expected_edge = round(over_prob - over_implied_odds,1)
        under_expected_edge = round(under_prob - under_implied_odds,1)

        # Return the larger edge value
        if over_expected_edge > under_expected_edge:
            return(over_expected_edge)
        else:
            return(under_expected_edge)
        
    else:
        
        return('NA')

In [101]:
def betGrade(EstimatedEdge):
    """
    @EstimatedEdge float: the estimated edge the bet has versus the DraftKings bet line given the SportsLine projection,
    the bet odds, and the player's standard deviations for the bet labels
    """
    if EstimatedEdge > 20:
        grade = 'A'
    elif EstimatedEdge > 15:
        grade = 'B'
    elif EstimatedEdge > 10:
        grade = 'C'
    elif EstimatedEdge > 5:
        grade = 'D'
    else:
        grade = 'F'
    return(grade)

## Read in and Format Tables 

In [96]:
BetOdds = pd.read_csv('Data/Backup/BetOdds2.csv')
BetOdds.head()

Unnamed: 0,League,Game,StartDate,StartTime,Player,BetLabel,SportsLine_Projection,DK_Line,Over_Odds,Under_Odds,BetSuggestion,Estimated_Edge,NBA_Stat,BetResult,W/L
0,NBA,TOR Raptors @ DET Pistons,"Fri, Jan 14",7:10 PM,Trey Lyles,Points + Rebounds,9.7,15.5,-115,-115,Under,18.3,28.0,Over,L
1,NBA,TOR Raptors @ DET Pistons,"Fri, Jan 14",7:10 PM,Pascal Siakam,Points + Assists,22.9,28.5,-110,-125,Under,17.6,27.0,Under,W
2,NBA,ORL Magic @ CHA Hornets,"Fri, Jan 14",7:10 PM,Cole Anthony,Points + Assists,20.6,25.5,-110,-125,Under,15.3,16.0,Under,W
3,NBA,TOR Raptors @ DET Pistons,"Fri, Jan 14",7:10 PM,Pascal Siakam,Points + Rebounds,26.7,31.5,-110,-120,Under,15.0,34.0,Over,L
4,NBA,BOS Celtics @ PHI 76ers,"Fri, Jan 14",7:10 PM,Andre Drummond,Points + Rebounds,18.1,12.5,-125,-105,Over,14.6,17.0,Over,W


In [33]:
NBA_Players = pd.read_csv('Data/Backup/NBA_Players.csv')
NBA_Players.head(n = 10)

Unnamed: 0,index,playerId,teamId,fullName,firstName,lastName,Position,yearsPro,startNBA,College,Country,DateOfBirth,Height,Weight
0,0,2,28.0,Quincy Acy,Quincy,Acy,F,6,2012,Baylor,USA,1990-10-06,2.01,108.9
1,1,4,19.0,Steven Adams,Steven,Adams,C,8,2013,Pittsburgh,New Zealand,1993-07-20,2.11,120.2
2,2,5,26.0,Arron Afflalo,Arron,Afflalo,G,0,0,,,,,
3,3,8,4.0,LaMarcus Aldridge,LaMarcus,Aldridge,C-F,15,2006,Texas-Austin,USA,1985-07-19,2.11,113.4
4,4,17,15.0,Justin Anderson,Justin,Anderson,F-G,5,2015,Virginia,USA,1993-11-19,1.96,104.8
5,5,18,19.0,Kyle Anderson,Kyle,Anderson,F-G,7,2014,UCLA,USA,1993-09-20,2.06,104.3
6,6,20,21.0,Giannis Antetokounmpo,Giannis,Antetokounmpo,F,8,2013,Filathlitikos,Greece,1994-12-06,2.11,109.8
7,7,21,17.0,Carmelo Anthony,Carmelo,Anthony,F,18,2003,Syracuse,USA,1984-05-29,2.01,108.0
8,8,23,17.0,Trevor Ariza,Trevor,Ariza,F,17,2004,UCLA,USA,1985-06-30,2.03,97.5
9,9,26,,Netanel Artzi,Netanel,Artzi,F,0,0,,,,,


In [34]:
NBA_GameStats = pd.read_csv('Data/Backup/NBA_GameStats.csv')
NBA_GameStats.head()

Unnamed: 0,index,gameId,playerId,teamId,Points,Position,Minutes,FGM,FGA,FGP,...,TPP,offReb,defReb,totReb,assists,pFouls,steals,turnovers,blocks,plusMinus
0,0,10815,44,17,6.0,,15:13,2.0,4.0,50.0,...,0.0,0.0,2.0,2.0,0.0,4.0,0.0,3.0,1.0,-21.0
1,1,10815,126,17,14.0,C,24:55,5.0,14.0,35.7,...,33.3,1.0,7.0,8.0,2.0,0.0,2.0,0.0,2.0,-10.0
2,2,10815,286,17,6.0,,19:46,3.0,3.0,100.0,...,0.0,0.0,7.0,7.0,1.0,2.0,0.0,1.0,1.0,-9.0
3,3,10815,1007,17,11.0,SG,26:52,5.0,12.0,41.7,...,20.0,0.0,3.0,3.0,2.0,1.0,0.0,1.0,0.0,-15.0
4,4,10815,1867,17,11.0,SF,23:40,3.0,7.0,42.9,...,50.0,1.0,2.0,3.0,3.0,1.0,1.0,5.0,0.0,-1.0


## Find Std Dev for Player Stats

In [12]:
# Filter dataset to Giannis Antetokounmpo 
df = NBA_GameStats[NBA_GameStats['playerId'] == 20]
# Filter out na rows
df = df.dropna()
# Find std deviations for stats of interest (Points, Assists, Rebounds)
print("Points Std Deviation: ", round(df.loc[:,"Points"].std(),1))
print("Rebounds Std Deviation: ", round(df.loc[:,"totReb"].std(),1))
print("Assists Std Deviation: ", round(df.loc[:,"assists"].std(),1))

Points Std Deviation:  8.4
Rebounds Std Deviation:  4.0
Assists Std Deviation:  2.5


In [76]:
sportsline_projection = 27.8
std_dev = 8.4
dk_line = 34.5
over_odds = '-120'
under_odds = '-110'

# Find the odds of going over and under the number
cdf = scipy.stats.norm(sportsline_projection, std_dev).cdf(dk_line)
over_prob = round((1 - cdf) * 100,1)
under_prob = round(cdf * 100,1)
print("Over odds: ", over_prob) 
print("Under odds: ", under_prob)

Over odds:  21.3
Under odds:  78.7


In [64]:
# Convert the odds to implied probability
over_implied_odds = impliedOdds(over_odds)
under_implied_odds = impliedOdds(under_odds)
print(over_implied_odds)
print(under_implied_odds)

# Find the expected edge by comparing the probabilities with the implied odds
over_expected_edge = round(over_prob - over_implied_odds,1)
under_expected_edge = round(under_prob - under_implied_odds,1)
print("Over  Expected Edge: ", over_expected_edge)
print("Under Expected Edge: ", under_expected_edge)

55
52
Over  Expected Edge:  -33.7
Under Expected Edge:  26.7


In [93]:
for index, row in BetOdds.iterrows():
        
    # Extract needed fields
    Player = row['Player']
    BetLabel = row['BetLabel']
    SportsLine_Projection = row['SportsLine_Projection']
    DK_Line = row['DK_Line']
    Over_Odds = row['Over_Odds']
    Under_Odds = row['Under_Odds']

    # Find the Std Deviation for the BetLabel
    Std_Dev = findPlayerStatStdDevs(Player, BetLabel)

    # Find the Estimated Edge
    Estimated_Edge = findEstimatedEdge(SportsLine_Projection, DK_Line, Std_Dev, Over_Odds, Under_Odds)

    # Set the stat in the bet_offers_dailydf table
    BetOdds.loc[index, "Estimated_Edge"] = Estimated_Edge
    
BetOdds.head()

Unnamed: 0,League,Game,StartDate,StartTime,Player,BetLabel,SportsLine_Projection,DK_Line,Over_Odds,Under_Odds,Line2Proj_Diff,Line2Proj_PctDiff,BetGrade,NBA_Stat,BetSuggestion,BetResult,W/L,Estimated_Edge
0,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",7:30 PM,Kevon Looney,Points + Assists,9.2,3.5,-140,105,5.7,162.86,B,8.0,Over,Over,W,14.9
1,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",7:30 PM,Grayson Allen,Points + Rebounds,16.1,14.5,-150,115,1.6,11.03,D,20.0,Over,Over,W,0.8
2,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",7:30 PM,Grayson Allen,Points,12.8,12.5,-110,-120,0.3,2.4,F,15.0,Over,Over,W,-3.1
3,NBA,GS Warriors @ MIL Bucks,"Thu, Jan 13",7:30 PM,Andrew Wiggins,Points + Rebounds,20.7,19.5,-115,-115,1.2,6.15,D,18.0,Over,Under,L,-0.6
4,NBA,PHI 76ers @ WAS Wizards,"Mon, Jan 17",2:10 PM,Daniel Gafford,Assists + Rebounds,7.2,2.5,-155,115,4.7,188.0,B,0.0,Over,Under,L,11.7


In [102]:
# Create a column with a "bet grade" for the bet based off the Line2ProjDiff
BetOdds['BetGrade'] = BetOdds['Estimated_Edge'].apply(betGrade)
BetOdds.head()

Unnamed: 0,League,Game,StartDate,StartTime,Player,BetLabel,SportsLine_Projection,DK_Line,Over_Odds,Under_Odds,BetSuggestion,Estimated_Edge,NBA_Stat,BetResult,W/L,BetGrade
0,NBA,TOR Raptors @ DET Pistons,"Fri, Jan 14",7:10 PM,Trey Lyles,Points + Rebounds,9.7,15.5,-115,-115,Under,18.3,28.0,Over,L,B
1,NBA,TOR Raptors @ DET Pistons,"Fri, Jan 14",7:10 PM,Pascal Siakam,Points + Assists,22.9,28.5,-110,-125,Under,17.6,27.0,Under,W,B
2,NBA,ORL Magic @ CHA Hornets,"Fri, Jan 14",7:10 PM,Cole Anthony,Points + Assists,20.6,25.5,-110,-125,Under,15.3,16.0,Under,W,B
3,NBA,TOR Raptors @ DET Pistons,"Fri, Jan 14",7:10 PM,Pascal Siakam,Points + Rebounds,26.7,31.5,-110,-120,Under,15.0,34.0,Over,L,C
4,NBA,BOS Celtics @ PHI 76ers,"Fri, Jan 14",7:10 PM,Andre Drummond,Points + Rebounds,18.1,12.5,-125,-105,Over,14.6,17.0,Over,W,C


In [103]:
# Save the file as a csv for analysis
BetOdds.to_csv('Data/Backup/BetOdds2.csv', index=False)

In [106]:
# Connect to SQLite
conn = sqlite3.connect('Data/PropAnalysis.db')

# Create the connection to the SQLite database
engine = create_engine('sqlite:///Data/PropAnalysis.db')
sqlite_connection = engine.connect()

# Save NBA_GameStats dataframe to SQLite
sqlite_table = "BetOdds"
BetOdds.to_sql(sqlite_table, sqlite_connection, if_exists='replace')

# Close connections when done
conn.close()
sqlite_connection.close()

## Score the Bets 

In [105]:
# Replace NaN values with NA
BetOdds.NBA_Stat.fillna('NA', inplace=True)
# Drop rows that we couldn't get an NBA_Stat for 
df = BetOdds.loc[BetOdds['NBA_Stat'] != 'NA']
# Create a table to show the win results by bet grade
df_counts = df.groupby(['BetGrade', 'W/L'])['W/L'].size()
df_pcts = df_counts.groupby(level=0).apply(lambda x: round(100 * x / float(x.sum()),1))
# Add the counts back to the table
df = pd.concat([df_counts, df_pcts], axis = 1)
# Format the columns
df.columns = ['Count', 'Win_Pct']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Win_Pct
BetGrade,W/L,Unnamed: 2_level_1,Unnamed: 3_level_1
A,L,8,22.2
A,W,28,77.8
B,L,14,32.6
B,W,29,67.4
C,L,46,45.5
C,W,55,54.5
D,L,100,47.6
D,W,110,52.4
F,L,377,50.3
F,W,372,49.7
