## Now that we have a couple days worth of data, lets do a simple backtest to see the outcome of betting on positive expected outcome games

In [2]:
import pandas as pd
import sqlite3
from sqlite3 import Error as e

#### We get our data

In [3]:
## We get our data from the data warehouse
data_warehouse = r"C:\Users\arbis\Projects\nate_silver_gets_me_money\database\dk_538_games_dw.db" 
con = None
try:
    con = sqlite3. connect(data_warehouse)
except Error as e:
    print(e)

past_games = pd.read_sql_query("SELECT * FROM game_dw WHERE date(game_date) < date('now');", con)

con.close()

past_games.head()

Unnamed: 0,game_date,away_team,home_team,away_pct_538,home_pct_538,away_moneyline_dk,home_moneyline_dk
0,2021-09-08,Mariners,Astros,0.38,0.62,150,-170
1,2021-09-08,Giants,Rockies,0.58,0.42,-145,125
2,2021-09-08,Rangers,Diamondbacks,0.44,0.56,140,-160
3,2021-09-08,Twins,Indians,0.42,0.58,115,-135
4,2021-09-08,Tigers,Pirates,0.5,0.5,100,-120


#### We load in actual game outcome data 

In [4]:
past_games_scores = pd.read_csv("C:/Users/arbis/Projects/nate_silver_gets_me_money/additional_data/prior_games.csv")
past_games_scores.head()

Unnamed: 0,Date,Away team,home team,away score,home score
0,2021-09-07,Twins,Indians,3,0
1,2021-09-07,Tigers,Pirates,2,3
2,2021-09-07,Mets,Marlins,9,4
3,2021-09-07,Blue Jays,Yankees,5,1
4,2021-09-07,Royals,Orioles,3,7


In [13]:
## We merge our past game dataframe with the scores dataframe
df = pd.merge(past_games, past_games_scores,  
                  how='right', 
                  left_on=['game_date','away_team', 'home_team'], 
                  right_on = ['Date','Away team', 'home team'])

df = df.dropna()

#### Lets start with seeing how many games draft kings and 538 predicted (not looking at magnitude)

In [6]:
## Lets build functions that gives percentages based off of a moneyline
def get_pct_from_moneyline(moneyline):
    
    moneyline = int(moneyline)
    
    ## If the moneyline is negative
    if moneyline < 0:
        
        pct = (-1 * moneyline) / ((-1 * moneyline) + 100)

    ## If the moneyline is positive
    elif moneyline > 0:
        
        pct = 100 / (moneyline + 100)

    pct = 100 * round(pct, 4)
    return pct

def asses_predictions_538(row):
    
    predicted_winner = ''
    actual_winner = ''
        
    ## we find which team 538 predicted to win
    if float(row['away_pct_538']) > float(row.home_pct_538):
        predicted_winner = 'away'
        
    elif float(row.away_pct_538) < float(row.home_pct_538):
        predicted_winner = 'home'
     
    ## and get the actual winner of that game
    if int(row["away score"]) > int(row["home score"]):
        actual_winner = 'away'
        
    elif int(row["away score"]) < int(row["home score"]):
        actual_winner = 'home'
     
    ## and check to see if 538 is correct
    if predicted_winner == actual_winner:
        return(1)
    
    elif predicted_winner != actual_winner:
        return(0)
    
def asses_predictions_dk(row):
    
    predicted_winner = ''
    actual_winner = ''
    
    ## we convert our moneylines into percentages
    away_pct = get_pct_from_moneyline(row.away_moneyline_dk)
    home_pct = get_pct_from_moneyline(row.home_moneyline_dk)
    
    ## we find which team DraftKings predicted to win
    if away_pct > home_pct:
        predicted_winner = 'away'
        
    if away_pct < home_pct:
        predicted_winner = 'home'

    ## and get the actual winner of that game        
    if int(row["away score"]) > int(row["home score"]):
        actual_winner = 'away'

    ## and check to see if DraftKings is correct
    elif int(row["away score"]) < int(row["home score"]):
        actual_winner = 'home'
        
    if predicted_winner == actual_winner:
        return(1)
    
    elif predicted_winner != actual_winner:
        return(0)
    
    
df['538_correct'] = df.apply(asses_predictions_538, axis = 1)
df['dk_correct'] = df.apply(asses_predictions_dk, axis = 1)

df.head()

Unnamed: 0,game_date,away_team,home_team,away_pct_538,home_pct_538,away_moneyline_dk,home_moneyline_dk,Date,Away team,home team,away score,home score,538_correct,dk_correct
0,2021-09-07,Twins,Indians,0.42,0.58,115.0,-135.0,2021-09-07,Twins,Indians,3,0,0,0
1,2021-09-07,Tigers,Pirates,0.52,0.48,-105.0,-115.0,2021-09-07,Tigers,Pirates,2,3,0,1
2,2021-09-07,Mets,Marlins,0.56,0.44,-180.0,155.0,2021-09-07,Mets,Marlins,9,4,1,1
3,2021-09-07,Blue Jays,Yankees,0.36,0.64,190.0,-235.0,2021-09-07,Blue Jays,Yankees,5,1,0,0
4,2021-09-07,Royals,Orioles,0.52,0.48,-120.0,100.0,2021-09-07,Royals,Orioles,3,7,0,0


In [7]:
print( "Draft kings predicted", df["dk_correct"].sum(), "of the 59 games correctly")

print( "538 predicted", df["538_correct"].sum(), "of the 59 games correctly")

print( "A random guess would predict", 59/2, "of the 59 games correctly")


Draft kings predicted 27 of the 59 games correctly
538 predicted 29 of the 59 games correctly
A random guess would predict 29.5 of the 59 games correctly


Coin flipping is the new Meta

### Ultimatly, our goal isn't to predict more games that Draftkings, its to find games when DrafKings underesitmates a team's chance of winning (compared to 538) and gives the team an overly generous moneyline 

#### Lets see how much money we would have made over the past 59 games if we went with that betting strategy

In [9]:
def bet_outcome(bet, moneyline):
    
    moneyline = int(moneyline)
    
    ## if the moneyline is negative (betting favortie)
    if moneyline < 0:
        
        outcome = (bet * 100) / abs(moneyline)
        
    ## if the moneyline is positive (underdog)
    if moneyline > 0:
        
        outcome = (bet * moneyline) / 100

    outcome = bet + round(outcome , 2)
        
    return outcome

In [10]:
## then apply our odds function to our draftkings moneyline to get our draftkings raw probability percentage
df["raw_home_odds_dk"] = df["home_moneyline_dk"].apply(get_pct_from_moneyline)
df["raw_away_odds_dk"] = df["away_moneyline_dk"].apply(get_pct_from_moneyline)

## sports book odds add up to 100 + the padding percentage, or their estimated profit margin
df["dk_total_padding_val"] = df["raw_home_odds_dk"] + df["raw_away_odds_dk"] - 100

## This padding percentage is (THEORETICALLY) added equally on both sides, or at least in very close amounts
## We rescale the raw percentages back to a 100% total scale to get our final percentage
df["home_odds_dk"] = df["raw_home_odds_dk"] / (df["raw_home_odds_dk"] + df["raw_away_odds_dk"])
df["home_odds_dk"] = round(df["home_odds_dk"], 4)

df["away_odds_dk"] = df["raw_away_odds_dk"] / (df["raw_home_odds_dk"] + df["raw_away_odds_dk"])
df["away_odds_dk"] = round(df["away_odds_dk"], 4)

## We get our winnings columns by applying our bet_ouctome function to the original moneyline
df["winnings_on_1_dollar_home"] = df["home_moneyline_dk"].apply(lambda ml: bet_outcome(1, ml))
df["winnings_on_1_dollar_away"] = df["away_moneyline_dk"].apply(lambda ml: bet_outcome(1, ml))

## And get our expected outcome by multipling the winnings by the probability
df["expected_outcome_home_538"] = (df["winnings_on_1_dollar_home"] * df["home_pct_538"]) - 1
df["expected_outcome_away_538"] = (df["winnings_on_1_dollar_away"] * df["away_pct_538"]) - 1

df["adj_expected_outcome_away_538"] = df["winnings_on_1_dollar_away"] * (df["away_pct_538"] - df['dk_total_padding_val'] / 100 / 2) - 1
df["adj_expected_outcome_home_538"] = df["winnings_on_1_dollar_home"] * (df["home_pct_538"] - df['dk_total_padding_val'] / 100 / 2) - 1

df.head()

Unnamed: 0,game_date,away_team,home_team,away_pct_538,home_pct_538,away_moneyline_dk,home_moneyline_dk,Date,Away team,home team,...,raw_away_odds_dk,dk_total_padding_val,home_odds_dk,away_odds_dk,winnings_on_1_dollar_home,winnings_on_1_dollar_away,expected_outcome_home_538,expected_outcome_away_538,adj_expected_outcome_away_538,adj_expected_outcome_home_538
0,2021-09-07,Twins,Indians,0.42,0.58,115.0,-135.0,2021-09-07,Twins,Indians,...,46.51,3.96,0.5526,0.4474,1.74,2.15,0.0092,-0.097,-0.13957,-0.025252
1,2021-09-07,Tigers,Pirates,0.52,0.48,-105.0,-115.0,2021-09-07,Tigers,Pirates,...,51.22,4.71,0.5108,0.4892,1.87,1.95,-0.1024,0.014,-0.031922,-0.146439
2,2021-09-07,Mets,Marlins,0.56,0.44,-180.0,155.0,2021-09-07,Mets,Marlins,...,64.29,3.51,0.3789,0.6211,2.55,1.56,0.122,-0.1264,-0.153778,0.077247
3,2021-09-07,Blue Jays,Yankees,0.36,0.64,190.0,-235.0,2021-09-07,Blue Jays,Yankees,...,34.48,4.63,0.6705,0.3295,1.43,2.9,-0.0848,0.044,-0.023135,-0.117905
4,2021-09-07,Royals,Orioles,0.52,0.48,-120.0,100.0,2021-09-07,Royals,Orioles,...,54.55,4.55,0.4782,0.5218,2.0,1.83,-0.04,-0.0484,-0.090032,-0.0855


In [11]:
games_to_bet_on = df.loc[(df['adj_expected_outcome_away_538'] > 0) | (df['adj_expected_outcome_home_538'] > 0)][["game_date", "away_team", "home_team", "away_pct_538", "home_pct_538", "away_moneyline_dk", "home_moneyline_dk", "adj_expected_outcome_away_538", "adj_expected_outcome_home_538"]]

games_to_bet_on_w_scores = pd.merge(games_to_bet_on, past_games_scores,  
                  how='left', 
                  left_on=['game_date','away_team', 'home_team'], 
                  right_on = ['Date','Away team', 'home team'])


games_to_bet_on_w_scores.head()

Unnamed: 0,game_date,away_team,home_team,away_pct_538,home_pct_538,away_moneyline_dk,home_moneyline_dk,adj_expected_outcome_away_538,adj_expected_outcome_home_538,Date,Away team,home team,away score,home score
0,2021-09-07,Mets,Marlins,0.56,0.44,-180.0,155.0,-0.153778,0.077247,2021-09-07,Mets,Marlins,9,4
1,2021-09-07,Reds,Cubs,0.52,0.48,-165.0,145.0,-0.187594,0.13827,2021-09-07,Reds,Cubs,4,3
2,2021-09-07,Dodgers,Cardinals,0.64,0.36,-160.0,140.0,0.010799,-0.17452,2021-09-07,Dodgers,Cardinals,7,2
3,2021-09-07,Angels,Padres,0.36,0.64,245.0,-310.0,0.16265,-0.18556,2021-09-07,Angels,Padres,4,0
4,2021-09-08,Rangers,Diamondbacks,0.44,0.56,140.0,-160.0,0.01748,-0.118801,2021-09-08,Rangers,Diamondbacks,8,5


In [12]:
def actual_outcome(row):
    
    ## if the away team's expected bet outcome is greater than 0, we bet on it
    if row['adj_expected_outcome_away_538'] > 0:
        
        ## We check to see that the away team won
        if row['away score'] > row['home score']:
            
            ## if they won, return the bet payout plus the dollar bet
            outcome = bet_outcome(1, row['away_moneyline_dk']) + 1
            return outcome
        
        ## if they dont win, you lost 1 dollar bet
        elif row['away score'] < row['home score']:
            return(-1)
        
    ## if the home team's expected bet outcome is greater than 0, we bet on it
    if row['adj_expected_outcome_home_538'] > 0:
    
        ## We check to see that the home team won
        if row['away score'] < row['home score']:
            
            ## if they won, return the bet payout plus the dollar bet
            outcome = bet_outcome(1, row['home_moneyline_dk']) + 1
            return outcome
        
        ## if they dont win, you lost 1 dollar bet
        elif row['away score'] > row['home score']:
            return(-1)

games_to_bet_on_w_scores['payout'] = games_to_bet_on_w_scores.apply(actual_outcome, axis = 1)

print("Total Payout:", games_to_bet_on_w_scores.payout.sum(), '\n')
print("Total Profit:", (games_to_bet_on_w_scores.payout.sum() - len(games_to_bet_on_w_scores)), '\n')
print("Total Profit per Game:", (games_to_bet_on_w_scores.payout.sum() - len(games_to_bet_on_w_scores)) / len(games_to_bet_on_w_scores))


Total Payout: 25.690000000000005 

Total Profit: 8.690000000000005 

Total Profit per Game: 0.5111764705882356


#### Out of the past 59 games (super low sample size), there were 17 games with a discrepency between 538's predictions and Draft King's moneylines where there was a positive expected outcome.

#### If you had bet a dollar on each of those 17 games, you would have won 25.69 dollars for a profit of 8.69 Dollars

#### This ammounts to an average expected payout of 50 cents per dollar, which is cause for concern as it is much higher than the usual expected value. This backs up the idea that this is a super low sample size