Betting strategy: To place $1 bet for each prediction in each race. If prediction is correct, we will collect $1 x odds. Otherwise, we lose $1

The final result is positive if we win some money and negative if we lose.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

# rcParams background color
plt.rcParams['figure.facecolor'] = 'white'

%matplotlib inline

# Part 6: Horse Race Prediction
## Backtesting Strategy Using our ML Models

In [None]:
# Read in the unseen file
df_unseen = pd.read_csv('D:\\documentos\\IA Caballos\\flat_data\\df_unseen.csv')
df_unseen.head()

In [None]:
df_unseen.fillna(0, inplace=True)
df_unseen

In [None]:
df_unseen.columns

In [None]:
# Read in our 4 classification predictions
lr_pred = pd.read_csv('D:\\documentos\\IA Caballos\\flat_data\\lr_pred.csv')
gnb_pred = pd.read_csv('D:\\documentos\\IA Caballos\\flat_data\\gnb_pred.csv')
rfc_pred = pd.read_csv('D:\\documentos\\IA Caballos\\flat_data\\rfc_pred.csv')
smote_rfc_pred = pd.read_csv('D:\\documentos\\IA Caballos\\flat_data\\\smote_rfc_pred.csv')

In [None]:
# Read in our 4 regression predictions
ridge_pred = pd.read_csv('D:\\documentos\\IA Caballos\\flat_data\\ridge_pred.csv')
knn_pred = pd.read_csv('D:\\documentos\\IA Caballos\\flat_data\\knn_pred.csv')
rf_pred = pd.read_csv('D:\\documentos\\IA Caballos\\flat_data\\rf_pred.csv')
lgbm_pred = pd.read_csv('D:\\documentos\\IA Caballos\\flat_data\\lgbm_pred.csv')

### Baseline Case: Bet on horse with the lowest odds

In [None]:
df_unseen['win_odds'] =df_unseen['win_odds'].str.rstrip('%').astype('float') / 100.0

In [None]:
df_unseen['win_odds'].sort_values(ascending=True).index[0]

In [None]:
# Check on baseline case where we just bet on the horse with the lowest odds
# Test on the df_unseen dataframes
import re
from numba import jit, cuda

@jit(target_backend='cuda')
def buy_lowest_odds(df, best_odds, graph=True):

    money=0
    bets_made=[]
    cumulative_money=[]

    for race_id in df['race_id'].unique():
        # Make a temporary dataframe
        df_temp = df[df['race_id'] == race_id]

        # Find the index with the lowest odds
        idx = df_temp['win_odds'].sort_values(ascending=True)[:best_odds].index

        bet_counter=0

        for i in idx:
            # If the horse wins, add the win_odds to the money
            if df_temp['finishing_position'][i] == 1:
                money += (df_temp['win_odds'][i] - 1)
                bet_counter += 1
                cumulative_money.append(money)
            else:
                money += -1
                bet_counter += 1
                cumulative_money.append(money)

        bets_made.append(bet_counter)    

    if graph==True: 
        # Plot the cumulative money
        plt.figure(figsize=(10, 6))
        plt.plot(cumulative_money)
        plt.axhline(y=0, color='r', linestyle='-')
        plt.title('Cumulative Money for Every $1 Bet')
        plt.xlabel('Bets Made')
        plt.ylabel('Cumulative Money')
        plt.show()

        # Plot the bets made
        plt.figure(figsize=(10, 6))
        plt.plot(bets_made)
        plt.title('Total Bets Made')
        plt.show()
        
    # Print the final money and bets made
    print('Final Money made from Baseline: ', round(money,3))
    print('Total Bets Made: ', round(sum(bets_made),3))

In [None]:
buy_lowest_odds(df_unseen, 1, True)

In [None]:
buy_lowest_odds(df_unseen, 3, False)

### Strategy 1: Buy Top Prediction
- If top horse prediction is 1, we make a bet, else we don't.

In [None]:
# Define a function to backtest the betting strategyimport re
from numba import jit, cuda
import re
@jit(target_backend='cuda')
def simple_class_strategy(model_pred, graph=True):

    df_unseen_results = df_unseen[['finishing_position', 'win_odds', 'race_id', 
                               'HorseWin', 'horse_id']]

    # rename columns
    df_unseen_results = df_unseen_results.rename(columns={'race_id': 'RaceID', 
                                                    'horse_id': 'HorseID', 
                                                    'HorseWin':'ActualWin'})
    
    # merge the prediction with the test data
    df_backtest = pd.merge(model_pred, df_unseen_results, on=('RaceID', 'HorseID'), how='left')

    money = 0
    bets_made = []
    cumulative_money = [0]

    for race_id in df_backtest['RaceID'].unique():

        # make a temporary dataframe one for that particular race
        temp_df = df_backtest[df_backtest['RaceID']==race_id]

        # find out the bets we made
        bets = temp_df[temp_df['HorseWin']==1]

        # deduct money for bets we made
        deduction  = -len(bets)

        # amount won from bets
        # sum of multiplying the odds with the prediction
        amount_won = sum(bets['win_odds']*bets['ActualWin'])
        
        # add the amount won to the money
        money += (amount_won + deduction)

        # append the money to the cumulative money list
        cumulative_money.append(money)

        # append the bets made to the bets made list
        bets_made.append(len(bets))
    
    if graph==True:
        # plot the cumulative money
        plt.figure(figsize=(10, 6))
        plt.plot(cumulative_money)
        plt.axhline(y=0, color='r', linestyle='-')
        plt.title('Cumulative Money for Every $1 Bet')
        plt.xlabel('Bets Made')
        plt.ylabel('Cumulative Money')
        plt.show()

        # plot the bets made
        plt.figure(figsize=(10, 6))
        plt.plot(bets_made)
        plt.title('Bets Made')
        plt.show()

    # print the final money and bets made
    print('Final Money: ', round(money, 3))
    print('Total Bets Made: ', round(sum(bets_made),3), '\n')

    return money, bets_made

### Strategy 2: Buy Fastest Timing
- We bet on the horse with the fastest predicted timing.

In [None]:
# define regression backtest functionimport refrom numba import jit, cuda
import re
@jit(target_backend='cuda')
def simple_reg_strategy(model_pred, graph=True):
        
        df_unseen_results = df_unseen[['finishing_position', 'win_odds', 'race_id', 'HorseWin', 'horse_id']]

        # rename columns
        df_unseen_results = df_unseen_results.rename(columns={'race_id': 'RaceID', 
                                                        'horse_id': 'HorseID', 
                                                        'HorseWin':'ActualWin'})
        
        # merge the prediction with the test data
        df_unseen_results['pred_time'] = model_pred['0']

        money = 0
        bets_made = []
        cumulative_money = [0]

        for race_id in df_unseen_results['RaceID'].unique():

                # make a temporary dataframe one for that particular race
                temp_df = df_unseen_results[df_unseen_results['RaceID']==race_id]

                # bet only on the horse with the fastest time
                # return dataframe where the time is the minimum
                bets = temp_df[temp_df['pred_time']==temp_df['pred_time'].min()]

                # deduct money for bets we made
                deduction  = -len(bets)

                # amount won from bets
                # sum of multiplying the odds with the prediction
                amount_won = sum(bets['win_odds']*bets['ActualWin'])
                
                # add the amount won to the money
                money += (amount_won + deduction)

                # append the money to the cumulative money list
                cumulative_money.append(money)

                # append the bets made to the bets made list
                bets_made.append(len(bets))
        
        if graph==True:
                # plot the cumulative money
                plt.figure(figsize=(10, 6))
                plt.plot(cumulative_money)
                plt.axhline(y=0, color='r', linestyle='-')
                plt.title('Cumulative Money for Every $1 Bet')
                plt.xlabel('Bets Made')
                plt.ylabel('Cumulative Money')
                plt.show()

                # plot the bets made
                plt.figure(figsize=(10, 6))
                plt.plot(bets_made)
                plt.title('Bets Made')
                plt.show()

        # print the final money and bets made
        print('Final Money: ', round(money, 3))
        print('Total Bets Made: ', round(sum(bets_made),3), '\n')

        return money, bets_made
                

### Strategy 3: Buy Top 3 Predictions
- We bet on the top 3 horses with the highest probability of winning

In [None]:
# Define a function to backtest the betting strategyimport re
@jit(target_backend='cuda')
def top3_strategy(model_pred, graph=True):

    df_unseen_results = df_unseen[['finishing_position', 'win_odds', 'race_id', 
                               'HorseWin', 'horse_id']]

    # rename columns
    df_unseen_results = df_unseen_results.rename(columns={'race_id': 'RaceID', 
                                                    'horse_id': 'HorseID', 
                                                    'HorseWin':'ActualWin'})
    
    # merge the prediction with the test data
    df_backtest = pd.merge(model_pred, df_unseen_results, on=('RaceID', 'HorseID'), how='left')

    money = 0
    bets_made = []
    cumulative_money = [0]

    for race_id in df_backtest['RaceID'].unique():

        # make a temporary dataframe one for that particular race
        temp_df = df_backtest[df_backtest['RaceID']==race_id]

        # find out the bets we made
        bets = temp_df[temp_df['HorseRankTop3']==1]

        # deduct money for bets we made
        deduction  = -len(bets)

        # amount won from bets
        # sum of multiplying the odds with the prediction
        amount_won = sum(bets['win_odds']*bets['ActualWin'])
        
        # add the amount won to the money
        money += (amount_won + deduction)

        # append the money to the cumulative money list
        cumulative_money.append(money)

        # append the bets made to the bets made list
        bets_made.append(len(bets))
    
    if graph==True:
        # plot the cumulative money
        plt.figure(figsize=(10, 6))
        plt.plot(cumulative_money)
        plt.axhline(y=0, color='r', linestyle='-')
        plt.title('Cumulative Money for Every $1 Bet')
        plt.xlabel('Bets Made')
        plt.ylabel('Cumulative Money')
        plt.show()

        # plot the bets made
        plt.figure(figsize=(10, 6))
        plt.plot(bets_made)
        plt.title('Bets Made')
        plt.show()

    # print the final money and bets made
    print('Final Money: ', round(money, 3))
    print('Total Bets Made: ', round(sum(bets_made),3), '\n')

    return money, bets_made

### Strategy 4: Buy Only Fast and Good
- We bet on the horse that is predicted to be in the top 1, top 3 and has the best timing.

In [None]:
# Define a function to backtest the betting strategyimport re
@jit(target_backend='cuda')
def hybrid_strategy(class_pred, reg_pred, graph=True):

    df_unseen_results = df_unseen[['finishing_position', 'win_odds', 'race_id', 
                               'HorseWin', 'horse_id']]

    # rename columns
    df_unseen_results = df_unseen_results.rename(columns={'race_id': 'RaceID', 
                                                    'horse_id': 'HorseID', 
                                                    'HorseWin':'ActualWin'})
    
    # merge the prediction with the test data
    df_hybrid = pd.merge(class_pred, df_unseen_results, on=('RaceID', 'HorseID'), how='left')
    df_hybrid['pred_time'] = reg_pred['0']

    money = 0
    bets_made = []
    cumulative_money = [0]

    for race_id in df_hybrid['RaceID'].unique():

        # make a temporary dataframe one for that particular race
        temp_df = df_hybrid[df_hybrid['RaceID']==race_id]

        # find out the bets we made
        top_filter = temp_df['HorseWin']==1
        top3_filter = temp_df['HorseRankTop3']==1
        time_filter = temp_df['pred_time']==temp_df['pred_time'].min()

        bets = temp_df[(top_filter) & (top3_filter) & (time_filter)]

        # deduct money for bets we made
        deduction  = -len(bets)

        # amount won from bets
        # sum of multiplying the odds with the prediction
        amount_won = sum(bets['win_odds']*bets['ActualWin'])
        
        # add the amount won to the money
        money += (amount_won + deduction)

        # append the money to the cumulative money list
        cumulative_money.append(money)

        # append the bets made to the bets made list
        bets_made.append(len(bets))
    
    if graph==True:
        # plot the cumulative money
        plt.figure(figsize=(10, 6))
        plt.plot(cumulative_money)
        plt.axhline(y=0, color='r', linestyle='-')
        plt.title('Cumulative Money for Every $1 Bet')
        plt.xlabel('Bets Made')
        plt.ylabel('Cumulative Money')
        plt.show()

        # plot the bets made
        plt.figure(figsize=(10, 6))
        plt.plot(bets_made)
        plt.title('Bets Made')
        plt.show()

    # print the final money and bets made
    print('Final Money: ', round(money, 3))
    print('Total Bets Made: ', round(sum(bets_made),3), '\n')

    return money, bets_made

### Run Strategy 1

In [None]:
class_pred_dict = {'Logistic Regression': lr_pred, 
                    'Gaussian Naive Bayes': gnb_pred,
                    'Random Forest Classifier': rfc_pred,
                    'SMOTE + RF': smote_rfc_pred}

reg_pred_dict = {'Ridge Regression': ridge_pred,
                'KNN Regression': knn_pred,
                'Random Forest Regressor': rf_pred,
                'LGBM': lgbm_pred}

In [None]:
# Create a dataframe to store the results
strat1_results = pd.DataFrame(columns=['Model', 'Money', 'Bets Made'])

for model_name, class_model in class_pred_dict.items():

        # Print the model name
        print(model_name)

        # change False to True if you want to view the graph
        money, bets = simple_class_strategy(class_model, graph=True)

        # Append the results to the dataframe
        strat1_results.loc[len(strat1_results)] = [model_name, money, sum(bets)]

In [None]:
# Create a dataframe to store the results
strat2_results = pd.DataFrame(columns=['Model', 'Money', 'Bets Made'])

for model_name, reg_model in reg_pred_dict.items():

        # Print the model name
        print(model_name)

        # change False to True if you want to view the graph
        money, bets = simple_reg_strategy(reg_model, graph = True)

        # Append the results to the dataframe
        strat2_results.loc[len(strat2_results)] = [model_name, money, sum(bets)]

In [None]:
# Create a dataframe to store the results
strat3_results = pd.DataFrame(columns=['Model', 'Money', 'Bets Made'])

for model_name, class_model in class_pred_dict.items():

        # change False to True if you want to view the graph
        money, bets = top3_strategy(class_model, graph = False)

        # Append the results to the dataframe
        strat3_results.loc[len(strat3_results)] = [model_name, money, sum(bets)]

In [None]:
# Create a dataframe to store the results
strat4_results = pd.DataFrame(columns=['Model', 'Money', 'Bets Made'])

for model_name1, model1 in class_pred_dict.items():
    for model_name2, model2 in reg_pred_dict.items():
        combo_model = model_name1, model_name2

        # Print the combo model name
        print(combo_model)

        # change False to True if you want to view the graph
        money, bets = hybrid_strategy(model1, model2, graph = False)

        # Append the results to the dataframe
        strat4_results.loc[len(strat4_results)] = [combo_model, money, sum(bets)]

### View the results

In [None]:
# Print Strategy 1 Results
strat1_results.sort_values(by='Money', ascending=False)

In [None]:
# Print strategy 2 results
strat2_results.sort_values(by='Money', ascending=False)


In [None]:
# Print strategy 3 results
strat3_results.sort_values(by='Money', ascending=False)

In [None]:
# Print strategy 4 results
strat4_results.sort_values(by='Money', ascending=False)

Happy to say that all the top models for the 4 different strategies seem to perform well during the testing. This was quite surprising given that the base case of choosing the horse with the best odds gave quite a negative result.