# Setting Up Environment

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler

import gym

from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv

In [2]:
# Joining predictions to table w/ results and getting result

predictions = pd.read_csv('mma_data_predictions.csv', index_col = 0)

data = pd.read_csv('mma_data.csv', index_col = 0)
data = data[data.result >= 0]
results_data = data[['fighter_1', 'fighter_2', 'result', 'KO_OVR', 'SUB_OVR']]

odds_data = pd.read_csv('mma_data_odds.csv', index_col = 0)

merged = predictions.merge(results_data, on = ['fighter_1', 'fighter_2'])

In [3]:
# Winner results
merged['Predicted_Result_RF'] = merged.Prediction_RF_Winner.apply(lambda x: 1 if x > 0.5 else 0)
merged['Predicted_Result_GB'] = merged.Prediction_GB_Winner.apply(lambda x: 1 if x > 0.5 else 0)
# merged['Predicted_Resulted_LR'] = merged.Prediction_LR_Winner.apply(lambda x: 1 if x > 0.5 else 0)
merged['Accurate_RF'] = merged.apply(lambda x: 1 if x.result_y == x.Predicted_Result_RF else 0, axis = 1)
merged['Accurate_GB'] = merged.apply(lambda x: 1 if x.result_y == x.Predicted_Result_GB else 0, axis = 1)

# Sub results
merged['Predicted_Sub_RF'] = merged.Prediction_RF_SUB.apply(lambda x: 1 if x > 0.5 else 0)
merged['Predicted_Sub_GB'] = merged.Prediction_GB_SUB.apply(lambda x: 1 if x > 0.5 else 0)
# merged['Predicted_Resulted_LR'] = merged.Prediction_LR_Winner.apply(lambda x: 1 if x > 0.5 else 0)
merged['Accurate_RF_SUB'] = merged.apply(lambda x: 1 if x.SUB_OVR_y == x.Predicted_Sub_RF else 0, axis = 1)
merged['Accurate_GB_SUB'] = merged.apply(lambda x: 1 if x.SUB_OVR_y == x.Predicted_Sub_GB else 0, axis = 1)

# KO Results
merged['Predicted_KO_RF'] = merged.Prediction_RF_KO.apply(lambda x: 1 if x > 0.5 else 0)
merged['Predicted_KO_GB'] = merged.Prediction_GB_KO.apply(lambda x: 1 if x > 0.5 else 0)
# merged['Predicted_Resulted_LR'] = merged.Prediction_LR_Winner.apply(lambda x: 1 if x > 0.5 else 0)
merged['Accurate_RF_KO'] = merged.apply(lambda x: 1 if x.KO_OVR_y == x.Predicted_KO_RF else 0, axis = 1)
merged['Accurate_GB_KO'] = merged.apply(lambda x: 1 if x.KO_OVR_y == x.Predicted_KO_GB else 0, axis = 1)

# Machine Learning

### Analyzing Accuracy

In [10]:
# Winner results
merged['Predicted_Result_RF'] = merged.Prediction_RF_Winner.apply(lambda x: 1 if x > 0.5 else 0)
merged['Predicted_Result_GB'] = merged.Prediction_GB_Winner.apply(lambda x: 1 if x > 0.5 else 0)
# merged['Predicted_Resulted_LR'] = merged.Prediction_LR_Winner.apply(lambda x: 1 if x > 0.5 else 0)
merged['Accurate_RF'] = merged.apply(lambda x: 1 if x.result_y == x.Predicted_Result_RF else 0, axis = 1)
merged['Accurate_GB'] = merged.apply(lambda x: 1 if x.result_y == x.Predicted_Result_GB else 0, axis = 1)

# Sub results
merged['Predicted_Sub_RF'] = merged.Prediction_RF_SUB.apply(lambda x: 1 if x > 0.5 else 0)
merged['Predicted_Sub_GB'] = merged.Prediction_GB_SUB.apply(lambda x: 1 if x > 0.5 else 0)
# merged['Predicted_Resulted_LR'] = merged.Prediction_LR_Winner.apply(lambda x: 1 if x > 0.5 else 0)
merged['Accurate_RF_SUB'] = merged.apply(lambda x: 1 if x.SUB_OVR_y == x.Predicted_Sub_RF else 0, axis = 1)
merged['Accurate_GB_SUB'] = merged.apply(lambda x: 1 if x.SUB_OVR_y == x.Predicted_Sub_GB else 0, axis = 1)

# KO Results
merged['Predicted_KO_RF'] = merged.Prediction_RF_KO.apply(lambda x: 1 if x > 0.5 else 0)
merged['Predicted_KO_GB'] = merged.Prediction_GB_KO.apply(lambda x: 1 if x > 0.5 else 0)
# merged['Predicted_Resulted_LR'] = merged.Prediction_LR_Winner.apply(lambda x: 1 if x > 0.5 else 0)
merged['Accurate_RF_KO'] = merged.apply(lambda x: 1 if x.KO_OVR_y == x.Predicted_KO_RF else 0, axis = 1)
merged['Accurate_GB_KO'] = merged.apply(lambda x: 1 if x.KO_OVR_y == x.Predicted_KO_GB else 0, axis = 1)

In [3]:
print(f'Winner accuracy for RF is: {merged.Accurate_RF.mean()*100}%')
print(f'Winner accuracy for GB is: {merged.Accurate_GB.mean()*100}%')
print(f'Sub accuracy for RF is: {merged.Accurate_RF_SUB.mean()*100}%')
print(f'Sub accuracy for GB is: {merged.Accurate_GB_SUB.mean()*100}%')
print(f'KO accuracy for RF is: {merged.Accurate_RF_KO.mean()*100}%')
print(f'KO accuracy for GB is: {merged.Accurate_GB_KO.mean()*100}%')

Winner accuracy for RF is: 55.26315789473685%
Winner accuracy for GB is: 60.526315789473685%
Sub accuracy for RF is: 71.05263157894737%
Sub accuracy for GB is: 73.68421052631578%
KO accuracy for RF is: 42.10526315789473%
KO accuracy for GB is: 50.0%


##### Looking At Only Veteran Fights

In [5]:
merged['Fights_1'] = merged.wins_1 + merged.losses_1
merged['Fights_2'] = merged.wins_2 + merged.losses_2

test = merged[(merged.Fights_1 > 15) | (merged.Fights_2 > 15)]

In [7]:
print(f'Winner accuracy for RF is: {test.Accurate_RF.mean()*100}%')
print(f'Winner accuracy for veterans GB is: {test.Accurate_GB.mean()*100}%')
print(f'Sub accuracy for veterans RF is: {test.Accurate_RF_SUB.mean()*100}%')
print(f'Sub accuracy for veterans GB is: {test.Accurate_GB_SUB.mean()*100}%')
print(f'KO accuracy for veterans RF is: {test.Accurate_RF_KO.mean()*100}%')
print(f'KO accuracy for veterans GB is: {test.Accurate_GB_KO.mean()*100}%')

Winner accuracy for RF is: 62.06896551724138%
Winner accuracy for veterans GB is: 65.51724137931035%
Sub accuracy for veterans RF is: 75.86206896551724%
Sub accuracy for veterans GB is: 79.3103448275862%
KO accuracy for veterans RF is: 48.275862068965516%
KO accuracy for veterans GB is: 51.724137931034484%


### Potential Profit

In [24]:
# Getting all the relevant data in one place

odds_data = odds_data[['fighter_1', 'fighter_2', 'Fighter_1_Odds', 'Fighter_2_Odds']]

profit_df = merged.merge(odds_data, on = ['fighter_1', 'fighter_2'])

##### RF

In [25]:
def rf_bets(row):
    if row.Predicted_Result_RF == 1:
        if (row.Fighter_1_Odds > 100) & (row.Fighter_1_Odds <= 200):
            bet = 50
        elif (row.Fighter_1_Odds > 200):
            bet = 25
        elif (row.Fighter_1_Odds <= 100) & (row.Fighter_1_Odds >= -200):
            bet = 75
        elif (row.Fighter_1_Odds < -200):
            bet = 100
        else:
            bet = 0
    if row.Predicted_Result_RF == 0:
        if (row.Fighter_2_Odds > 100) & (row.Fighter_2_Odds <= 200):
            bet = 50
        elif (row.Fighter_2_Odds > 200):
            bet = 25
        elif (row.Fighter_2_Odds <= 100) & (row.Fighter_2_Odds >= -200):
            bet = 75
        elif (row.Fighter_2_Odds < -200):
            bet = 100
        else:
            bet = 0
    return bet

def calculate_payoff_and_result(row):
    # Calculating Payoff
    if row.Predicted_Result_RF == 1:
        if row.Fighter_1_Odds>0:
            payoff = (row.Fighter_1_Odds/100)*row.Bet
        else:
            payoff = row.Bet/((abs(row.Fighter_1_Odds)/100))
    else:
        if row.Fighter_2_Odds>0:
            payoff = (row.Fighter_2_Odds/100)*row.Bet
        else:
            payoff = row.Bet/((abs(row.Fighter_2_Odds)/100))
    # Calculating Bet Result
    if row.Predicted_Result_RF == row.result_y:
        bet_result = payoff
    else:
        bet_result = -(row.Bet)
    
    return bet_result
            
profit_df['Bet'] = profit_df.apply(rf_bets, axis = 1)
profit_df['Bet'] = 100
profit_df['Bet_Result'] = profit_df.apply(calculate_payoff_and_result, axis = 1)

profit_df.Bet_Result.sum()

-659.0888475311815

In [26]:
# Veteran fights only

profit_df['Fights_1'] = profit_df.wins_1 + profit_df.losses_1
profit_df['Fights_2'] = profit_df.wins_2 + profit_df.losses_2

test = profit_df[(profit_df.Fights_1 > 15) | (profit_df.Fights_2 > 15)]
test.Bet_Result.sum()

-236.11067038098906

##### GB

In [27]:
def rf_bets(row):
    if row.Predicted_Result_GB == 1:
        if (row.Fighter_1_Odds > 100) & (row.Fighter_1_Odds <= 200):
            bet = 50
        elif (row.Fighter_1_Odds > 200):
            bet = 25
        elif (row.Fighter_1_Odds <= 100) & (row.Fighter_1_Odds >= -200):
            bet = 75
        elif (row.Fighter_1_Odds < -200):
            bet = 100
        else:
            bet = 0
    if row.Predicted_Result_GB == 0:
        if (row.Fighter_2_Odds > 100) & (row.Fighter_2_Odds <= 200):
            bet = 50
        elif (row.Fighter_2_Odds > 200):
            bet = 25
        elif (row.Fighter_2_Odds <= 100) & (row.Fighter_2_Odds >= -200):
            bet = 75
        elif (row.Fighter_2_Odds < -200):
            bet = 100
        else:
            bet = 0
    return bet

def calculate_payoff_and_result(row):
    # Calculating Payoff
    if row.Predicted_Result_GB == 1:
        if row.Fighter_1_Odds>0:
            payoff = (row.Fighter_1_Odds/100)*row.Bet
        else:
            payoff = row.Bet/((abs(row.Fighter_1_Odds)/100))
    else:
        if row.Fighter_2_Odds>0:
            payoff = (row.Fighter_2_Odds/100)*row.Bet
        else:
            payoff = row.Bet/((abs(row.Fighter_2_Odds)/100))
    # Calculating Bet Result
    if row.Predicted_Result_GB == row.result_y:
        bet_result = payoff
    else:
        bet_result = -(row.Bet)
    
    return bet_result
            
profit_df['Bet'] = profit_df.apply(rf_bets, axis = 1)
profit_df['Bet'] = 100
profit_df['Bet_Result'] = profit_df.apply(calculate_payoff_and_result, axis = 1)

profit_df.Bet_Result.sum()

-375.95770154586677

In [28]:
# Veteran fights only

profit_df['Fights_1'] = profit_df.wins_1 + profit_df.losses_1
profit_df['Fights_2'] = profit_df.wins_2 + profit_df.losses_2

test = profit_df[(profit_df.Fights_1 > 15) | (profit_df.Fights_2 > 15)]
test.Bet_Result.sum()

-58.53507995122976

# Reinforcement Learning

In [4]:
# Setting up environment and data

# Loading SAC model
model = SAC.load('SAC_model.zip')

# Environment
class BettingEnv(gym.Env):
    
    def __init__(self, df, initial_funds):
        self.df = df
        self.current_step = 0
        self.initial_funds = initial_funds
        self.current_funds = initial_funds
        # Actions of the format Team 1 x%, Team 2 x%, No bet, etc.
        self.action_space = gym.spaces.Box(
          low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(17,), dtype=np.float32)
    
    def step(self, action):
        self.current_step += 1
        action_type = action[0]
        action_amount = (action[1] * self.current_funds) / 100.0
        
        if self.current_step >= len(self.df):
            done = True
            reward = 0
            obs = self.df.loc[len(self.df) - 1, ['reach_diff', 'age_diff', 'slpm_diff', 'sapm_diff', 'td_acc_diff', 'td_def_diff',
              'td_avg_diff', 'sub_avg_diff', 'strk_acc_diff', 'strk_def_diff', 'wins_diff',
              'losses_diff', 'win_pct_diff', 'weight_1', 'age_1', 'Prediction_1_lr', 'Prediction_rf']].values
            
        else:
            done = False
            row = self.df.loc[self.current_step, :]
            obs = row[['reach_diff', 'age_diff', 'slpm_diff', 'sapm_diff', 'td_acc_diff', 'td_def_diff',
              'td_avg_diff', 'sub_avg_diff', 'strk_acc_diff', 'strk_def_diff', 'wins_diff',
              'losses_diff', 'win_pct_diff', 'weight_1', 'age_1', 'Prediction_1_lr', 'Prediction_rf']].values
            
            if action_type < 1:
                # Betting on fighter 1
                if row[['Fighter_1_Odds']].values[0] > 0:
                    payoff = (row[['Fighter_1_Odds']].values[0]/100)*action_amount
                else:
                    payoff = action_amount/((abs(row[['Fighter_1_Odds']].values[0])/100))
                # Determining reward based on result
                if row[['result']].values[0] == 1:
                    reward = payoff
                else:
                    reward = -(action_amount)
                
            elif action_type < 2:
                # Determining payoff (away team)
                if row[['Fighter_2_Odds']].values[0] > 0:
                    payoff = (row[['Fighter_2_Odds']].values[0]/100)*action_amount
                else:
                    payoff = action_amount/((abs(row[['Fighter_2_Odds']].values[0])/100))
                # Determining reward based on result
                if row[['result']].values[0] == 0:
                    reward = payoff
                else:
                    reward = -(action_amount)
                
            else:
                # No bet
                reward = 0

        self.current_funds += reward
        return obs, reward, done, {}
    
    def reset(self):
        self.current_step = 0
        self.current_funds = self.initial_funds
        return self.df.loc[self.current_step, ['reach_diff', 'age_diff', 'slpm_diff', 'sapm_diff', 'td_acc_diff', 'td_def_diff',
              'td_avg_diff', 'sub_avg_diff', 'strk_acc_diff', 'strk_def_diff', 'wins_diff',
              'losses_diff', 'win_pct_diff', 'weight_1', 'age_1', 'Prediction_1_lr', 'Prediction_rf']]
    
    def render(self, mode='human'):
        # Render the environment
        print(f"Current funds: {self.current_funds}")

# Renaming columns to fit environment
merged.columns = ['fighter_1', 'weight_1', 'reach_1', 'age_1', 'slpm_1', 'sapm_1',
       'td_avg_1', 'sub_avg_1', 'strk_acc_1', 'strk_def_1', 'td_acc_1',
       'td_def_1', 'wins_1', 'losses_1', 'fighter_2', 'weight_2', 'reach_2',
       'age_2', 'slpm_2', 'sapm_2', 'td_avg_2', 'sub_avg_2', 'strk_acc_2',
       'strk_def_2', 'td_acc_2', 'td_def_2', 'wins_2', 'losses_2', 'result_x',
       'SUB_OVR_x', 'KO_OVR_x', 'reach_diff', 'age_diff', 'slpm_diff',
       'sapm_diff', 'td_acc_diff', 'td_def_diff', 'td_avg_diff',
       'sub_avg_diff', 'strk_acc_diff', 'strk_def_diff', 'wins_diff',
       'losses_diff', 'win_pct_1', 'win_pct_2', 'win_pct_diff',
       'Prediction_RF_Winner', 'Prediction_GB_Winner', 'Prediction_1_lr',
       'Prediction_RF_SUB', 'Prediction_GB_SUB', 'Prediction_LR_SUB',
       'Prediction_RF_KO', 'Prediction_GB_KO', 'Prediction_LR_KO', 'Date',
       'result_y', 'KO_OVR_y', 'SUB_OVR_y', 'Prediction_rf',
       'Predicted_Result_GB', 'Accurate_RF', 'Accurate_GB', 'Predicted_Sub_RF',
       'Predicted_Sub_GB', 'Accurate_RF_SUB', 'Accurate_GB_SUB',
       'Predicted_KO_RF', 'Predicted_KO_GB', 'Accurate_RF_KO',
       'Accurate_GB_KO']

In [7]:
# Generating predictions

rl_prediction_columns = ['reach_diff', 'age_diff', 'slpm_diff', 'sapm_diff', 'td_acc_diff', 'td_def_diff',
              'td_avg_diff', 'sub_avg_diff', 'strk_acc_diff', 'strk_def_diff', 'wins_diff',
              'losses_diff', 'win_pct_diff', 'weight_1', 'age_1', 'Prediction_1_lr', 'Prediction_rf']
rl_data = merged[rl_prediction_columns]
for col in rl_prediction_columns:
    rl_data[col] = rl_data[col].astype('float')

scaler = StandardScaler()
rl_data[rl_prediction_columns] = scaler.fit_transform(rl_data[rl_prediction_columns])

rl_data['Prediction_RL'] = rl_data.apply(lambda x: model.predict(x), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


In [8]:
rl_data

Unnamed: 0,reach_diff,age_diff,slpm_diff,sapm_diff,td_acc_diff,td_def_diff,td_avg_diff,sub_avg_diff,strk_acc_diff,strk_def_diff,wins_diff,losses_diff,win_pct_diff,weight_1,age_1,Prediction_1_lr,Prediction_rf,Prediction_RL
0,0.160456,0.724558,0.671929,1.03671,1.067165,0.237727,0.1424,-0.704235,-0.96988,-0.115018,1.778395,0.388753,0.422659,0.408856,0.093422,-0.182986,0.597614,"([0.83572894, 0.7780827], None)"
1,1.75224,-0.45064,0.529108,0.60277,-0.445144,0.237727,-0.480559,-0.576599,0.098394,0.095618,-1.350263,0.388753,-0.919416,-0.632118,0.093422,-0.182983,0.597614,"([2.6968265, 0.96162903], None)"
2,-1.593204,-0.114869,0.275203,0.126286,0.64708,-1.267877,0.350053,0.189214,0.039045,-0.641608,-0.255233,-0.179425,0.151864,0.408856,-0.115404,-0.182976,0.597614,"([0.079300046, 0.11804825], None)"
3,0.187436,-1.457952,-0.510315,-1.652019,0.058959,-2.124913,1.433107,0.48703,0.929274,0.990821,-0.568098,-1.031691,1.370444,-0.892362,-0.950707,0.542962,0.597614,"([2.9792182, 0.19646633], None)"
4,0.079518,0.724558,1.723254,-0.805409,1.515257,2.044452,0.777574,0.31685,-1.266623,0.095618,0.214066,-0.179425,0.151864,-0.892362,-0.741881,-0.182979,0.597614,"([2.9607291, 0.002286166], None)"
5,1.941096,-1.122181,2.465132,-1.435048,-1.285315,-1.175224,-0.338052,0.189214,0.929274,-3.169241,-0.411666,-1.031691,1.336595,0.408856,-0.115404,-0.182987,0.597614,"([2.992497, 0.0036958456], None)"
6,0.187436,0.388787,-0.744384,-0.554405,0.226994,-1.221551,0.085397,0.274305,0.691879,0.042959,1.465529,1.24102,-0.895213,-1.152605,0.093422,-0.180251,-1.67332,"([1.0211006, 0.17305541], None)"
7,0.187436,-0.114869,0.961539,1.172848,-1.425344,1.326394,-0.651568,-2.87404,-0.376395,-0.220336,-0.880964,-0.747602,0.461344,-1.152605,0.093422,-0.182987,-1.67332,"([1.312561, 0.22457504], None)"
8,0.052539,2.067641,-0.98242,0.470886,1.011154,-0.086557,0.981156,0.614666,-0.257698,0.148277,3.186292,1.809197,-0.756293,0.929344,2.599333,-0.182987,-1.67332,"([2.092991, 0.28101522], None)"
9,0.079518,1.060329,-0.050113,0.377291,-0.109075,-0.202373,-0.00825,0.699756,0.810576,-0.483631,-0.0988,-1.31578,1.32018,-1.152605,0.928726,-0.182957,0.597614,"([2.9831457, 0.0527986], None)"
