# C01. Gambling
This evaluates gambling projections
- Type: Evaluation
- Run Frequency: Irregular
- Sources:
    - Sportsbook Review
- Dates:
    - Created: 3/30/2024
    - Updated: 5/5/2024

### Imports

In [1]:
%run "C:\Users\james\Documents\MLB\Code\U1. Imports.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U2. Utilities.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U3. Classes.ipynb"

In [2]:
# Set option to display numbers without scientific notation
pd.set_option('display.float_format', '{:.6f}'.format)

### Dates

In [22]:
start_date = "20240320"
# end_date = "20240818"

In [23]:
# start_date = yesterdaysdate
end_date = yesterdaysdate

In [24]:
# start_date = todaysdate
# end_date = todaysdate

### Games

In [25]:
game_df = read_and_save_games(team_map, generate=True)
game_df = game_df[(game_df['date'] >= start_date) & (game_df['date'] <= end_date)].reset_index(drop=True)

### Models

##### Create dataframe containing model information

In [26]:
model_dictionary = {
    "date": todaysdate,
    "batter_stats_scaler_name": batter_stats_scaler_name, 
    "pitcher_stats_scaler_name": pitcher_stats_scaler_name,
    "batter_stats_fg_scaler_name": batter_stats_fg_scaler_name, 
    "pitcher_stats_fg_scaler_name": pitcher_stats_fg_scaler_name,
    "batter_imputations_model_name": batter_imputations_model_name, 
    "pitcher_imputations_model_name": pitcher_imputations_model_name, 
    "model_binary_name": model_binary_name, 
    "model_outs_name": model_outs_name, 
    "model_safe_name": model_safe_name,
    "model_errors_name": model_errors_name, 
    "model_dp_name": model_dp_name, 
    "model_out_bases_name": model_out_bases_name, 
    "model_events_name": model_events_name, 
    "model_date": model_date, 
    "model_pulls_name": model_pulls_name,
    "model_sba_2b_name": model_sba_2b_name, 
    "model_sba_3b_name": model_sba_3b_name, 
    "model_sb_2b_name": model_sb_2b_name, 
    "model_sb_3b_name": model_sb_3b_name,
}

model_df = pd.DataFrame([model_dictionary])

# Functions

##### Calculate gambling stats for sims

In [27]:
def gambling_stats_sim(df):
    ### OU
    # Sim total
    df['total_proj'] = df[['away_score', 'home_score']].sum(axis=1)
    # Sim suggests the over
    df['over_proj'] = (df['total_proj'] > df['OU']).astype('int')
    # Sim suggests the under
    df['under_proj'] = (df['total_proj'] < df['OU']).astype('int')
    # Sim suggests push
    df['push_proj'] = (df['total_proj'] == df['OU']).astype('int')

    # Actual total
    df['total_act'] = df[['away_score_act', 'home_score_act']].sum(axis=1)
    # Over hits
    df['over_act'] = (df['total_act'] > df['OU']).astype('int')
    # Under hits
    df['under_act'] = (df['total_act'] < df['OU']).astype('int')
    # Push
    df['push_act'] = (df['total_act'] == df['OU']).astype('int')
    
    ### Spread
    # Sim projects away covers
    df['away_sp_proj'] = (df['away_score'] > (df['home_score'] + df['Spread'])).astype('int')
    # Sim projects home covers
    df['home_sp_proj'] = (df['away_score'] < (df['home_score'] + df['Spread'])).astype('int')
    # Sim projects push
    df['push_sp_proj'] = (df['away_score'] == (df['home_score'] + df['Spread'])).astype('int')

    # Away team covers
    df['away_sp_act'] = (df['away_score_act'] > (df['home_score_act'] + df['Spread'])).astype('int')
    # Home team covers
    df['home_sp_act'] = (df['away_score_act'] < (df['home_score_act'] + df['Spread'])).astype('int')
    # Push
    df['push_sp_act'] = (df['away_score_act'] == (df['home_score_act'] + df['Spread'])).astype('int')
    
    
    ### ML
    # Sim projects away team wins
    df['away_ml_proj'] = (df['away_score'] > df['home_score']).astype('int')
    # Sim projects home team wins
    df['home_ml_proj'] = (df['away_score'] < df['home_score']).astype('int')

    # Away team wins
    df['away_ml_act'] = (df['away_score_act'] > df['home_score_act']).astype('int')
    # Home team wins
    df['home_ml_act'] = (df['away_score_act'] < df['home_score_act']).astype('int')
    
    # Convert to numeric
    df['away_score_act'] = df['away_score_act'].astype('int')
    df['home_score_act'] = df['home_score_act'].astype('int')
     
    # df_grouped = df.groupby('game_id')[numeric_columns].mean().reset_index()
    df_grouped = df.groupby(['game_id', 'venue_id', 'game_type', 'status', 'game_num', 'away_team', 'home_team']).mean(numeric_only=True).reset_index()
    

    return df_grouped

##### Convert American odds to payout multipliers

In [28]:
def multipliers(odds):
    if odds == 0:
        return np.nan
    
    elif odds > 0:
        mult = odds / 100 + 1
    else: 
        mult = 100 / odds * - 1 + 1
        
    return mult

##### Calculate gambling stats for matchups

In [29]:
def gambling_stats_games(df):
    # Calculate gambling stats for sims
    df_grouped = gambling_stats_sim(df)
    

    # Convert American odds to multipliers
    for col in ['SpreadMoney1', 'SpreadMoney2', 'OuMoney1', 'OuMoney2', 'MLMoney1', 'MLMoney2']:
        df_grouped[f"{col}_usa"] = df_grouped[col]
        df_grouped[col] = df_grouped[col].apply(multipliers)

    ### Calculate predicted payouts
    # Spreads
    df_grouped['AwaySpreadPred'] = df_grouped['SpreadMoney1'] * df_grouped['away_sp_proj'] + df_grouped['push_sp_proj']
    df_grouped['HomeSpreadPred'] = df_grouped['SpreadMoney2'] * df_grouped['home_sp_proj'] + df_grouped['push_sp_proj']

    # Over/Unders
    df_grouped['OverPred'] = df_grouped['OuMoney1'] * df_grouped['over_proj'] + df_grouped['push_proj']
    df_grouped['UnderPred'] = df_grouped['OuMoney2'] * df_grouped['under_proj'] + df_grouped['push_proj']

    # ML
    df_grouped['AwayMLPred'] = df_grouped['MLMoney1'] * df_grouped['away_ml_proj']
    df_grouped['HomeMLPred'] = df_grouped['MLMoney2'] * df_grouped['home_ml_proj']

    ### Calculate actual payouts
    # Spreads
    df_grouped['AwaySpreadAct'] = df_grouped['SpreadMoney1'] * df_grouped['away_sp_act']
    df_grouped['HomeSpreadAct'] = df_grouped['SpreadMoney2'] * df_grouped['home_sp_act']
    # Adjust for pushes
    df_grouped['AwaySpreadAct'] = np.where(df_grouped['push_sp_act'] == 1, 1, df_grouped['AwaySpreadAct'])
    df_grouped['HomeSpreadAct'] = np.where(df_grouped['push_sp_act'] == 1, 1, df_grouped['HomeSpreadAct'])
    

    # Over/Unders
    df_grouped['OverAct'] = df_grouped['OuMoney1'] * df_grouped['over_act']
    df_grouped['UnderAct'] = df_grouped['OuMoney2'] * df_grouped['under_act']
    # Adjust for pushes
    df_grouped['OverAct'] = np.where(df_grouped['OU'] == df_grouped['total_act'], 1, df_grouped['OverAct'])
    df_grouped['UnderAct'] = np.where(df_grouped['OU'] == df_grouped['total_act'], 1, df_grouped['UnderAct'])

    # ML
    df_grouped['AwayMLAct'] = df_grouped['MLMoney1'] * df_grouped['away_ml_act']
    df_grouped['HomeMLAct'] = df_grouped['MLMoney2'] * df_grouped['home_ml_act']


    ### Pick bets
    # Spread
    df_grouped['SpreadPick'] = np.where(df_grouped['AwaySpreadPred'] >= 1, "Away", "Home")
    df_grouped['SpreadPick'] = np.where((df_grouped['AwaySpreadPred'] < 1) & (df_grouped['HomeSpreadPred'] < 1), "Neither", df_grouped['SpreadPick'])

    # Over/Under
    df_grouped['OverUnderPick'] = np.where(df_grouped['UnderPred'] >= 1, "Under", "Over")
    df_grouped['OverUnderPick'] = np.where((df_grouped['UnderPred'] < 1) & (df_grouped['OverPred'] < 1), "Neither", df_grouped['OverUnderPick'])

    # ML
    df_grouped['MLPick'] = np.where(df_grouped['AwayMLPred'] >= 1, "Away", "Home")
    df_grouped['MLPick'] = np.where((df_grouped['AwayMLPred'] < 1) & (df_grouped['HomeMLPred'] < 1), "Neither", df_grouped['MLPick'])


    ### Calculate winnings
    # Spread
    df_grouped['SpreadPayout'] = np.where(df_grouped['SpreadPick'] == "Away", df_grouped['AwaySpreadAct'], df_grouped['HomeSpreadAct'])
    df_grouped['SpreadPayout'] = np.where(df_grouped['SpreadPick'] == "Neither", np.nan, df_grouped['SpreadPayout'])

    # Over/Under
    df_grouped['OverUnderPayout'] = np.where(df_grouped['OverUnderPick'] == "Under", df_grouped['UnderAct'], df_grouped['OverAct'])
    df_grouped['OverUnderPayout'] = np.where(df_grouped['OverUnderPick'] == "Neither", np.nan, df_grouped['OverUnderPayout'])

    # ML
    df_grouped['MLPayout'] = np.where(df_grouped['MLPick'] == "Away", df_grouped['AwayMLAct'], df_grouped['HomeMLAct'])
    df_grouped['MLPayout'] = np.where(df_grouped['MLPick'] == "Neither", np.nan, df_grouped['MLPayout'])
    
    
    df_grouped['SpreadPayoutPred'] = np.where(df_grouped['AwaySpreadPred'] > 1, df_grouped['AwaySpreadPred'], df_grouped['HomeSpreadPred'])
    df_grouped['SpreadPayoutPred'] = np.where(df_grouped['SpreadPayoutPred'] < 1, np.nan, df_grouped['SpreadPayoutPred'])
    
    df_grouped['OverUnderPayoutPred'] = np.where(df_grouped['OverPred'] > 1, df_grouped['OverPred'], df_grouped['UnderPred'])
    df_grouped['OverUnderPayoutPred'] = np.where(df_grouped['OverUnderPayoutPred'] < 1, np.nan, df_grouped['OverUnderPayoutPred'])
    
    df_grouped['MLPayoutPred'] = np.where(df_grouped['AwayMLPred'] > 1, df_grouped['AwayMLPred'], df_grouped['HomeMLPred'])
    df_grouped['MLPayoutPred'] = np.where(df_grouped['MLPayoutPred'] < 1, np.nan, df_grouped['MLPayoutPred'])
    
    
    return df_grouped

##### Calculate errors and squared errors

In [30]:
def vegas_runs(gambling_df):
    gambling_df.rename(columns={'away_score': 'VisitorModelRuns', 'home_score': 'HomeModelRuns'}, inplace=True)

    gambling_df['VisitorModelError'] = gambling_df['VisitorModelRuns'] - gambling_df['away_score_act']
    gambling_df['VisitorVegasError'] = gambling_df['VisitorVegasRuns'] - gambling_df['away_score_act']

    gambling_df['VisitorModelSqError'] = gambling_df['VisitorModelError'] ** 2
    gambling_df['VisitorVegasSqError'] = gambling_df['VisitorVegasError'] ** 2

    gambling_df['HomeModelError'] = gambling_df['HomeModelRuns'] - gambling_df['home_score_act']
    gambling_df['HomeVegasError'] = gambling_df['HomeVegasRuns'] - gambling_df['home_score_act']

    gambling_df['HomeModelSqError'] = gambling_df['HomeModelError'] ** 2
    gambling_df['HomeVegasSqError'] = gambling_df['HomeVegasError'] ** 2
    
    # Total runs
    gambling_df['total_act'] = gambling_df['away_score_act'] + gambling_df['home_score_act']
    gambling_df['TotalVegasRuns'] = gambling_df['VisitorVegasRuns'] + gambling_df['HomeVegasRuns']

    return gambling_df

##### Determine spread side

In [31]:
# Function to determine SpreadSide
def determine_spread_side(row):
    if (row['SpreadPick'] == 'Away' and row['Spread'] < 0) or (row['SpreadPick'] == 'Home' and row['Spread'] > 0):
        return 'Underdog'
    elif (row['SpreadPick'] == 'Away' and row['Spread'] > 0) or (row['SpreadPick'] == 'Home' and row['Spread'] < 0):
        return 'Favorite'
    else:
        return 'Neither'

##### Impute Vegas runs

Methodology:
- Calculate win probabilities using ML payouts
- Assign runs from O/U using win probability

Limitations:
- Using win probability to assign runs is imperfect
- Ignores different payouts for overs and unders

In [32]:
def impute_runs(df):
    ### Implied Odds
    # Odds calculated from lines
    df['Win1_A'] = np.where(df['MLMoney1'] > 0, 100 / (df['MLMoney1'] + 100), -df['MLMoney1'] / (-df['MLMoney1'] + 100))
    df['Win2_A'] = np.where(df['MLMoney2'] > 0, 100 / (df['MLMoney2'] + 100), -df['MLMoney2'] / (-df['MLMoney2'] + 100))
    
    # Odds derived from calculations above
    df['Win1_B'] = 1 - df['Win2_A']
    df['Win2_B'] = 1 - df['Win1_A']
    
    # Odds calculated by averaging two calculations
    df['Win1'] = df[['Win1_A', 'Win1_B']].mean(axis=1)
    df['Win2'] = df[['Win2_A', 'Win2_B']].mean(axis=1)
    
    
    ### Implied Runs
    df['VisitorVegasRuns'] = df['OU'] * df['Win1']
    df['HomeVegasRuns'] = df['OU'] * df['Win2']
    
    
    return df

##### Create dataset

In [33]:
def gambling_dataset(game_df, team_map, date, impute=True):
    ### Odds
    # Read in odds
    odds_df = pd.read_csv(os.path.join(baseball_path, "A08. Odds Sportsbook Review", f"Odds {date}.csv"))
    
    # Impute runs
    if impute == True:
        odds_df = impute_runs(odds_df)
       
    # Identify which game comes first
    odds_df.sort_values(by=['VisitorTeamShort', 'EventDateTime'], inplace=True)
    odds_df['game_num'] = odds_df.groupby(['VisitorTeamShort']).cumcount()+1
    # Convert to string
    odds_df['date'] = odds_df['date'].astype('str')                  
    
    # Merge in team abbreviation
    odds_df = odds_df.merge(team_map[['SBRTEAM', 'BBREFTEAM']], left_on=['HomeTeamShort'], right_on=['SBRTEAM'], how='left')
    
    
    
    ### Simulations
    # Subset daily games
    daily_game_df = game_df[game_df['date'] == date].reset_index(drop=True)
    # Create list of dataframes
    gambling_df_list = []
    
    # Loop over games
    for i in range(len(daily_game_df)):
        # Extract date and game_id
        date, game_id = daily_game_df['date'][i], daily_game_df['game_id'][i]
        
        if os.path.exists(os.path.join(baseball_path, "B02. Simulations", "1. Game Sims", f"Matchups {date}", f"game_{game_id}.csv")):
            # Read in game simulations
            simulation_df = pd.read_csv(os.path.join(baseball_path, "B02. Simulations", "1. Game Sims", f"Matchups {date}", f"game_{game_id}.csv"))
            # Add game_id for merging
            simulation_df['game_id'] = game_id
            
            ### Merge in game_df
            gambling_df = pd.merge(simulation_df, game_df[['game_id', 'away_score', 'home_score', 'venue_id', 'game_type', 'status', 'game_num', 'away_team', 'home_team']], on=['game_id'], how='left', suffixes=('', '_act'))
            
            ### Merge in odds_df
            gambling_df = pd.merge(gambling_df, odds_df, left_on=['home_team', 'game_num'], right_on=['BBREFTEAM', 'game_num'], how='left')
            
            # Calculate Gambling Stats
            gambling_df = gambling_stats_games(gambling_df)
            gambling_df['SpreadSide'] = gambling_df.apply(determine_spread_side, axis=1)
            # Create run error stats
            gambling_df = vegas_runs(gambling_df)
            # Append to list
            gambling_df_list.append(gambling_df)
            
        else:
            pass
        
    # Concatenate all game gambling stats together
    daily_gambling_df = pd.concat(gambling_df_list, axis=0)
    daily_gambling_df.reset_index(inplace=True, drop=True)
    
    # Add date
    daily_gambling_df['date'] = date
        
    return daily_gambling_df

##### Graphs

In [34]:
def payout_plotter(gambling_df, bet, range_width=0.1, ax=None):
    gambling_df[f'{bet}PayoutGroup'] = gambling_df[f'{bet}PayoutPred'] // range_width * range_width

    # Group by SpreadPayoutGroup and calculate the average SpreadMoney for each group
    grouped_data = gambling_df.groupby(f'{bet}PayoutGroup')[f'{bet}Payout'].mean().reset_index()

    # Use existing axis if provided, otherwise create a new subplot
    if ax is None:
        fig, ax = plt.subplots(figsize=(4, 4))

    # Create a scatter plot
    ax.scatter(grouped_data[f'{bet}PayoutGroup'], grouped_data[f'{bet}Payout'], color='blue', alpha=0.7)

    # Set specific axis limits and ticks with intervals of 0.1
    ax.set_xlim(0.8, 2.0)
    ax.set_xticks(np.arange(0.7, 2.1, 0.1))
    ax.set_ylim(0.8, 2.0)
    ax.set_yticks(np.arange(0.7, 2.1, 0.1))

    ax.set_title(f'Average {bet}PayoutPred vs {bet}Payout')
    ax.set_xlabel(f'{bet}PayoutGroup')
    ax.set_ylabel(f'Average {bet}PayoutPred')
    ax.grid(True)

##### Significance Test

In [35]:
def significance_test(df, bet, threshold, alpha=0.05):
    # Accepted bets
    bet_list = ['OverUnder', 'Spread', 'ML']
    if bet not in bet_list:
        print(f"{bet} not an approved bet type. Did you mean one of: {bet_list}?")
        return
    
    # Extract bets within threshold
    test_list = list(df.query(f'{bet}PayoutPred >= {threshold}')[f'{bet}Payout'])
    test_list = [x for x in test_list if not math.isnan(x)]
    
    # Calculate average payout and sample size
    average_payout = round(np.mean(test_list), 5)
    sample_size = len(test_list)
    
    # Calculate the average of the list and perform a one-sample t-test
    t_statistic, p_value_two_sided = stats.ttest_1samp(test_list, 1)
    
    
    # For a one-sided test (H1: mean > mu), divide the p-value by 2
    if t_statistic > 0:
        p_value = p_value_two_sided / 2
    else:
        p_value = 1.0  # If t-stat is negative, the p-value is 1
    

    # Determine significance
    if p_value < alpha:
        significant = "Yes"
    else:
        significant = "No"
    
    
    return average_payout, sample_size, p_value, significant

### Dataset

Identify list of dates to include

In [36]:
date_list = list(game_df['date'].unique())

Create all daily gambling files and append them together

In [37]:
# df = gambling_dataset(game_df=game_df, team_map=team_map, date="20240720", impute=True)

In [38]:
%%time
daily_gambling_dfs_list = Parallel(n_jobs=-1)(delayed(gambling_dataset)(game_df=game_df, team_map=team_map, date=date, impute=True) for date in date_list)
complete_gambling_df = pd.concat(daily_gambling_dfs_list, axis=0)
complete_gambling_df

CPU times: total: 15.6 ms
Wall time: 1.04 s


Unnamed: 0,game_id,venue_id,game_type,status,game_num,away_team,home_team,VisitorModelRuns,HomeModelRuns,away_score_act,home_score_act,Spread,OU,SpreadMoney1,SpreadMoney2,OuMoney1,OuMoney2,MLMoney1,MLMoney2,VisitorVegasRuns,HomeVegasRuns,Win1_A,Win2_A,Win1_B,Win2_B,Win1,Win2,total_proj,over_proj,under_proj,push_proj,total_act,over_act,under_act,push_act,away_sp_proj,home_sp_proj,push_sp_proj,away_sp_act,home_sp_act,push_sp_act,away_ml_proj,home_ml_proj,away_ml_act,home_ml_act,SpreadMoney1_usa,SpreadMoney2_usa,OuMoney1_usa,OuMoney2_usa,MLMoney1_usa,MLMoney2_usa,AwaySpreadPred,HomeSpreadPred,OverPred,UnderPred,AwayMLPred,HomeMLPred,AwaySpreadAct,HomeSpreadAct,OverAct,UnderAct,AwayMLAct,HomeMLAct,SpreadPick,OverUnderPick,MLPick,SpreadPayout,OverUnderPayout,MLPayout,SpreadPayoutPred,OverUnderPayoutPred,MLPayoutPred,SpreadSide,VisitorModelError,VisitorVegasError,VisitorModelSqError,VisitorVegasSqError,HomeModelError,HomeVegasError,HomeModelSqError,HomeVegasSqError,TotalVegasRuns,date
0,745443,31,R,In Progress,1,MIL,PIT,3.750992,4.054563,5.0,2.0,1.5,7.5,2.45,1.531915,1.909091,1.833333,1.8,1.952381,3.912602,3.587398,0.555556,0.512195,0.487805,0.444444,0.52168,0.47832,7.805556,0.472222,0.527778,0.0,7.0,0.0,1.0,0.0,0.321429,0.678571,0.0,1.0,0.0,0.0,0.445437,0.554563,1.0,0.0,145.0,-188.0,-110.0,-120.0,-125.0,-105.0,0.7875,1.039514,0.901515,0.967593,0.801786,1.082719,2.45,0.0,0.0,1.833333,1.8,0.0,Home,Neither,Home,0.0,,0.0,1.039514,,1.082719,Underdog,-1.249008,-1.087398,1.560021,1.182435,2.054563,1.587398,4.221231,2.519834,7.5,20240926
1,744796,3309,R,In Progress,1,KCR,WSN,4.455357,3.742063,4.0,4.0,1.5,7.5,1.952381,1.8,1.740741,2.05,1.555556,2.4,4.598214,2.901786,0.642857,0.416667,0.583333,0.357143,0.613095,0.386905,8.197421,0.508929,0.491071,0.0,8.0,1.0,0.0,0.0,0.416667,0.583333,0.0,0.0,1.0,0.0,0.534722,0.465278,0.0,0.0,-105.0,-125.0,-135.0,105.0,-180.0,140.0,0.813492,1.05,0.885913,1.006696,0.83179,1.116667,0.0,1.8,1.740741,0.0,0.0,0.0,Home,Under,Home,1.8,0.0,0.0,1.05,1.006696,1.116667,Underdog,0.455357,0.598214,0.20735,0.35786,-0.257937,-1.098214,0.066531,1.206075,7.5,20240926
2,746415,2394,R,In Progress,1,TBR,DET,3.956349,4.395833,3.0,2.0,-1.5,8.0,1.645161,2.3,1.952381,1.869565,2.3,1.645161,3.307758,4.692242,0.434783,0.607843,0.392157,0.565217,0.41347,0.58653,8.352183,0.458333,0.47123,0.070437,5.0,0.0,1.0,0.0,0.608135,0.391865,0.0,1.0,0.0,0.0,0.438492,0.561508,1.0,0.0,-155.0,130.0,-105.0,-115.0,130.0,-155.0,1.00048,0.90129,0.965278,0.951432,1.008532,0.923771,1.645161,0.0,0.0,1.869565,2.3,0.0,Away,Neither,Away,1.645161,,2.3,1.00048,,1.008532,Underdog,0.956349,0.307758,0.914604,0.094715,2.395833,2.692242,5.740017,7.248168,8.0,20240926
3,746738,4,R,In Progress,1,LAA,CHW,3.979167,3.860119,0.0,0.0,1.5,7.5,2.2,1.704225,1.925926,1.892857,1.740741,2.14,4.151919,3.348081,0.574468,0.46729,0.53271,0.425532,0.553589,0.446411,7.839286,0.488095,0.511905,0.0,0.0,0.0,1.0,0.0,0.363095,0.636905,0.0,0.0,1.0,0.0,0.484127,0.515873,0.0,0.0,120.0,-142.0,-108.0,-112.0,-135.0,114.0,0.79881,1.085429,0.940035,0.968963,0.84274,1.103968,0.0,1.704225,0.0,1.892857,0.0,0.0,Home,Neither,Home,1.704225,,0.0,1.085429,,1.103968,Underdog,3.979167,4.151919,15.833767,17.23843,3.860119,3.348081,14.900519,11.209647,7.5,20240926
4,746495,19,R,In Progress,1,STL,COL,5.228175,5.327381,2.0,0.0,1.5,10.5,2.36,1.617284,1.892857,1.925926,1.892857,1.925926,5.297623,5.202377,0.528302,0.519231,0.480769,0.471698,0.504536,0.495464,10.555556,0.468254,0.531746,0.0,2.0,0.0,1.0,0.0,0.362103,0.637897,0.0,1.0,0.0,0.0,0.486111,0.513889,1.0,0.0,136.0,-162.0,-112.0,-108.0,-112.0,-108.0,0.854563,1.03166,0.886338,1.024103,0.920139,0.989712,2.36,0.0,0.0,1.925926,1.892857,0.0,Home,Under,Neither,0.0,1.925926,,1.03166,1.024103,,Underdog,3.228175,3.297623,10.421111,10.87432,5.327381,5.202377,28.380988,27.064723,10.5,20240926
5,745609,10,R,Warmup,1,TEX,OAK,4.074405,3.768849,0.0,0.0,1.5,7.5,2.6,1.520833,1.952381,1.869565,1.980392,1.847458,3.613748,3.886252,0.50495,0.541284,0.458716,0.49505,0.481833,0.518167,7.843254,0.473214,0.526786,0.0,0.0,0.0,1.0,0.0,0.382937,0.617063,0.0,0.0,1.0,0.0,0.511905,0.488095,0.0,0.0,160.0,-192.0,-105.0,-115.0,-102.0,-118.0,0.995635,0.938451,0.923895,0.98486,1.013772,0.901735,0.0,1.520833,0.0,1.869565,0.0,0.0,Neither,Neither,Away,,,0.0,,,1.013772,Neither,4.074405,3.613748,16.600774,13.059173,3.768849,3.886252,14.204224,15.102956,7.5,20240926
6,745691,3313,R,Pre-Game,1,BAL,NYY,4.229167,4.743056,0.0,0.0,-1.5,8.0,1.617284,2.36,1.925926,1.892857,2.36,1.617284,3.221633,4.778367,0.423729,0.618321,0.381679,0.576271,0.402704,0.597296,8.972222,0.513889,0.400794,0.085317,0.0,0.0,1.0,0.0,0.59127,0.40873,0.0,1.0,0.0,0.0,0.412698,0.587302,0.0,0.0,-162.0,136.0,-108.0,-112.0,136.0,-162.0,0.956251,0.964603,1.075029,0.843963,0.973968,0.949833,1.617284,0.0,0.0,1.892857,0.0,0.0,Neither,Over,Neither,,0.0,,,1.075029,,Neither,4.229167,3.221633,17.885851,10.378918,4.743056,4.778367,22.496576,22.832793,8.0,20240926
7,745853,3312,R,Scheduled,1,MIA,MIN,3.665675,5.098214,0.0,0.0,-1.5,8.5,1.952381,1.869565,1.833333,2.0,3.05,1.4,2.607728,5.892272,0.327869,0.714286,0.285714,0.672131,0.306792,0.693208,8.763889,0.477183,0.522817,0.0,0.0,0.0,1.0,0.0,0.496032,0.503968,0.0,1.0,0.0,0.0,0.316468,0.683532,0.0,0.0,-105.0,-115.0,-120.0,100.0,205.0,-250.0,0.968443,0.942202,0.874835,1.045635,0.965228,0.956944,1.952381,0.0,0.0,2.0,0.0,0.0,Neither,Under,Neither,,2.0,,,1.045635,,Neither,3.665675,2.607728,13.43717,6.800247,5.098214,5.892272,25.991789,34.718865,8.5,20240926
8,746088,22,R,Scheduled,1,SDP,LAD,4.297619,4.708333,0.0,0.0,1.5,8.5,2.7,1.487805,1.892857,1.925926,1.952381,1.869565,4.153573,4.346427,0.512195,0.534884,0.465116,0.487805,0.488656,0.511344,9.005952,0.496032,0.503968,0.0,0.0,0.0,1.0,0.0,0.337302,0.662698,0.0,0.0,1.0,0.0,0.452381,0.547619,0.0,0.0,170.0,-205.0,-112.0,-108.0,-105.0,-115.0,0.910714,0.985966,0.938917,0.970606,0.88322,1.02381,0.0,1.487805,0.0,1.925926,0.0,0.0,Neither,Neither,Home,,,0.0,,,1.02381,Neither,4.297619,4.153573,18.469529,17.252172,4.708333,4.346427,22.168403,18.891424,8.5,20240926


##### Clean

In [None]:
# May require cleaning later. At the moment, it's fine.

In [None]:
complete_gambling_df.shape, complete_gambling_df[complete_gambling_df['game_type'] == "R"].shape, complete_gambling_df.drop_duplicates(subset=['game_id']).shape

### 1. Profitability

##### Graph by Bin

In [None]:
# Create a single row with three subplots
fig, axes = plt.subplots(1, 3, figsize=(12, 4))

# Call the function for each subplot
payout_plotter(complete_gambling_df, 'Spread', ax=axes[0])
payout_plotter(complete_gambling_df, 'OverUnder', ax=axes[1])
payout_plotter(complete_gambling_df, 'ML', ax=axes[2])

# Adjust layout for better spacing
plt.tight_layout()

# Show the plots
plt.show()

SpreadMean = complete_gambling_df['SpreadPayout'].mean()
OverUnderMean = complete_gambling_df['OverUnderPayout'].mean()
MLMean = complete_gambling_df['MLPayout'].mean()
print(f"\t\t\tSpread: {round(SpreadMean, 3)}\t\t\t\t\tOverUnder: {round(OverUnderMean, 3)}\t\t\t\tMoney Line: {round(MLMean, 3)}")

##### Significance Test

Main Bets

In [None]:
average_payout, sample_size, p_value, significant = significance_test(complete_gambling_df, "Spread", 1.0, 0.05)
print("Spread:    ", f"Average Payout: {average_payout}, Sample Size: {sample_size}, P-Value: {p_value}, Significant: {significant}")

average_payout, sample_size, p_value, significant = significance_test(complete_gambling_df, "OverUnder", 1.0, 0.05)
print("OverUnder: ", f"Average Payout: {average_payout}, Sample Size: {sample_size}, P-Value: {p_value}, Significant: {significant}")

average_payout, sample_size, p_value, significant = significance_test(complete_gambling_df, "ML", 1.0, 0.05)
print("ML:        ", f"Average Payout: {average_payout}, Sample Size: {sample_size}, P-Value: {p_value}, Significant: {significant}")

Spread

In [None]:
average_payout, sample_size, p_value, significant = significance_test(complete_gambling_df, "Spread", 1.0, 0.05)
print("Spread:    ", f"Average Payout: {average_payout}, Sample Size: {sample_size}, P-Value: {p_value}, Significant: {significant}")
sp_list = [average_payout, sample_size, p_value, significant]

average_payout, sample_size, p_value, significant = significance_test(complete_gambling_df.query('SpreadSide == "Underdog"'), "Spread", 1.0, 0.05)
print("Underdog:  ", f"Average Payout: {average_payout}, Sample Size: {sample_size}, P-Value: {p_value}, Significant: {significant}")
sp_underdog_list = [average_payout, sample_size, p_value, significant]

average_payout, sample_size, p_value, significant = significance_test(complete_gambling_df.query('SpreadSide == "Favorite"'), "Spread", 1.0, 0.05)
print("Favorite:  ", f"Average Payout: {average_payout}, Sample Size: {sample_size}, P-Value: {p_value}, Significant: {significant}")
sp_favorite_list = [average_payout, sample_size, p_value, significant]

Over/Under

In [None]:
average_payout, sample_size, p_value, significant = significance_test(complete_gambling_df, "OverUnder", 1.0, 0.05)
print("OverUnder: ", f"Average Payout: {average_payout}, Sample Size: {sample_size}, P-Value: {p_value}, Significant: {significant}")
ou_list = [average_payout, sample_size, p_value, significant]

average_payout, sample_size, p_value, significant = significance_test(complete_gambling_df.query('OverUnderPick == "Over"'), "OverUnder", 1.0, 0.05)
print("Over:      ", f"Average Payout: {average_payout}, Sample Size: {sample_size}, P-Value: {p_value}, Significant: {significant}")
ou_over_list = [average_payout, sample_size, p_value, significant]

average_payout, sample_size, p_value, significant = significance_test(complete_gambling_df.query('OverUnderPick == "Under"'), "OverUnder", 1.0, 0.05)
print("Under:     ", f"Average Payout: {average_payout}, Sample Size: {sample_size}, P-Value: {p_value}, Significant: {significant}")
ou_under_list = [average_payout, sample_size, p_value, significant]

Moneyline

In [None]:
average_payout, sample_size, p_value, significant = significance_test(complete_gambling_df, "ML", 1.0, 0.05)
print("ML:        ", f"Average Payout: {average_payout}, Sample Size: {sample_size}, P-Value: {p_value}, Significant: {significant}")
ml_list = [average_payout, sample_size, p_value, significant]

average_payout, sample_size, p_value, significant = significance_test(complete_gambling_df.query('MLPick == "Away"'), "ML", 1.0, 0.05)
print("ML - Away: ", f"Average Payout: {average_payout}, Sample Size: {sample_size}, P-Value: {p_value}, Significant: {significant}")
ml_away_list = [average_payout, sample_size, p_value, significant]

average_payout, sample_size, p_value, significant = significance_test(complete_gambling_df.query('MLPick == "Home"'), "ML", 1.0, 0.05)
print("ML - Home: ", f"Average Payout: {average_payout}, Sample Size: {sample_size}, P-Value: {p_value}, Significant: {significant}")
ml_home_list = [average_payout, sample_size, p_value, significant]

Note: Frequencies of each bet can be obtained from this

In [None]:
# Out of curiosity, how close do I expect spread counts to be? As in, the spread is always the same, so maybe it's biased to pick one side more than the other

##### Log 

In [None]:
profitability_dictionary = {
    "Spread": sp_list[0],
    "Spread n": sp_list[1],
    "Spread p": sp_list[2],
    "Spread YN": sp_list[3],
    "Underdog": sp_underdog_list[0],
    "Underdog n": sp_underdog_list[1],
    "Underdog p": sp_underdog_list[2],
    "Underdog YN": sp_underdog_list[3],
    "Favorite": sp_favorite_list[0],
    "Favorite n": sp_favorite_list[1],
    "Favorite p": sp_favorite_list[2],
    "Favorite YN": sp_favorite_list[3],
    "OU": ou_list[0],
    "OU n": ou_list[1],
    "OU p": ou_list[2],
    "OU YN": ou_list[3],
    "Over": ou_over_list[0],
    "Over n": ou_over_list[1],
    "Over p": ou_over_list[2],
    "Over YN": ou_over_list[3],
    "Under": ou_under_list[0],
    "Under n": ou_under_list[1],
    "Under p": ou_under_list[2],
    "Under YN": ou_under_list[3],
    "ML": ml_list[0],
    "ML n": ml_list[1],
    "ML p": ml_list[2],
    "ML YN": ml_list[3],
    "Away": ml_away_list[0],
    "Away n": ml_away_list[1],
    "Away p": ml_away_list[2],
    "Away YN": ml_away_list[3],
    "Home": ml_home_list[0],
    "Home n": ml_home_list[1],
    "Home p": ml_home_list[2],
    "Home YN": ml_home_list[3]
}

In [None]:
# Create dataframe
profitability_df = pd.DataFrame([profitability_dictionary])
# Concatenate model information
profitability_df = pd.concat([model_df, profitability_df], axis=1)
# Write to csv
profitability_df.to_csv(os.path.join(baseball_path, "C01. Gambling", "1. Profitability", f"{todaysdate} Profitability Log.csv"), index=False)

### 2. Scoring

Victory Margin

In [None]:
# Calculate average score differential between teams
# I tend to predict closer matchups, so I'd like to try to better replicate Vegas
complete_gambling_df['ModelDiff'] = (complete_gambling_df['VisitorModelRuns'] - complete_gambling_df['HomeModelRuns']).abs()
complete_gambling_df['VegasDiff'] = (complete_gambling_df['VisitorVegasRuns'] - complete_gambling_df['HomeVegasRuns']).abs()
complete_gambling_df.describe()

##### Graph Runs

In [None]:
complete_gambling_df['toohigh'] = (complete_gambling_df['total_proj'] > complete_gambling_df['OU']).astype('int')

In [None]:
# Total: Choose Vegas or Actual to compare my runs to 
# total = "TotalVegasRuns"
total = "total_act"

complete_gambling_df = complete_gambling_df.sort_values('date', ascending=True).reset_index(drop=True)

# You can convert the index to a list for the x-axis and get the values for the y-axis
x_values = complete_gambling_df.index.tolist()
y_values = complete_gambling_df['total_proj'].tolist()

# Calculate the rolling average
rolling_avg = complete_gambling_df[f'{total}'].rolling(window=200, min_periods=100).mean()

# Create a scatter plot
plt.scatter(x_values, y_values, label='total_proj')

# Add the rolling average line
plt.plot(x_values, rolling_avg, color='red', label='Rolling Average (Window=200)')

# Set labels and title
plt.xlabel('Index')
plt.ylabel('total_proj')
plt.title('Scatter Plot with Rolling Average')
plt.legend()

# Show the plot
plt.show()

Comparing my projections vs. Vegas vs. Actual

In [None]:
complete_gambling_df[['VisitorModelRuns', 'HomeModelRuns', 'VisitorVegasRuns', 'HomeVegasRuns', 'away_score_act', 'home_score_act', 'VisitorModelSqError', 'HomeModelSqError', 'VisitorVegasSqError', 'HomeVegasSqError']].describe()

In [None]:
# Create dataframe
scoring_df = pd.DataFrame(complete_gambling_df[['VisitorModelRuns', 'HomeModelRuns', 'VisitorVegasRuns', 'HomeVegasRuns', 'away_score_act', 'home_score_act', 'VisitorModelSqError', 'HomeModelSqError', 'VisitorVegasSqError', 'HomeVegasSqError']].mean()).T
# Concatenate model information
scoring_df = pd.concat([model_df, scoring_df], axis=1)
# Write to csv
scoring_df.to_csv(os.path.join(baseball_path, "C01. Gambling", "2. Scoring", f"{todaysdate} Scoring Log.csv"), index=False)