## Plain logistic regression isn't looking promising

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm


# Load data
data = pd.read_csv("./out/d_fight_level_dataset_1line.csv", index_col = 0)

# Change winner to binary 1/0:
data.Winner = data.Winner.apply(lambda x: np.where(x == -1, 0, 1))

# Initial features and target
features = pd.Series(data.columns, index = data.columns)
target = "Winner"

# Remove referree, date, location, winner, title_bout, weight_class, no_of_rounds
features.drop(index = ["Referee", "date", "location", "Winner", "title_bout",
                       "weight_class", "no_of_rounds"], inplace = True)

# Diff_draw is mostly NA/0
features.drop(index = "Diff_draw", inplace = True)

# Lots of win columns
features.drop(index = ["Diff_win_by_Decision_Majority",
                       "Diff_win_by_Decision_Split",
                       "Diff_win_by_Decision_Unanimous",
                       "Diff_win_by_KO/TKO",
                       "Diff_win_by_Submission",
                       "Diff_win_by_TKO_Doctor_Stoppage"], inplace = True)

If we just throw everything in, getting lots of bad p-values (and a pretty bad r-squared).

In [2]:
# Naive throw everything in
logit_model = sm.Logit(data[target], data[features])
result = logit_model.fit()
print(result.summary2())


Optimization terminated successfully.
         Current function value: 0.652126
         Iterations 5
                                Results: Logit
Model:                   Logit                 Pseudo R-squared:      0.009    
Dependent Variable:      Winner                AIC:                   3090.0410
Date:                    2019-11-17 15:41      BIC:                   3296.9234
No. Observations:        2314                  Log-Likelihood:        -1509.0  
Df Model:                35                    LL-Null:               -1522.0  
Df Residuals:            2278                  LLR p-value:           0.86368  
Converged:               1.0000                Scale:                 1.0000   
No. Iterations:          5.0000                                                
-------------------------------------------------------------------------------
                                 Coef.  Std.Err.    z    P>|z|   [0.025  0.975]
---------------------------------------------------

In [None]:
# make predictions and check recall, precision, f1 score.

from sklearn.metrics import confusion_matrix, classification_report, f1_score, precision_score, recall_score

pred = result.predict()
print( 
    'Mean wins: %s \nMean predict: %s\n' % ( 
    data.Winner.mean(),
    pred.mean()
))

# what is our base level if we predict the majority?
print( 'Accuracy predicting all wins:\n')
print( classification_report( 
    data.Winner, 
    [ 1 for x in pred ]
))

# what is the outcome of different cutoffs?
print( 'Accuracy with varying cutoffs:\n' )
for i in range(11): 
    
    icutoff = i/10
    
    predwin = [ 1 if x > i/10 else 0 for x in pred ]
    predloss = [ 0 if x > i/10 else 1 for x in pred ]
    
    fscorewin = f1_score( data.Winner, predwin )
    fscoreloss = f1_score( ( data.Winner == 0 ) * 1, predloss )    
    prec = precision_score( data.Winner, predwin )
    recall = recall_score( data.Winner, predwin )
    
    print(
        '%s: \t f1-score: %s   \t precision %s   \t recall: %s' % ( 
            i/10, 
            round( (fscorewin + fscoreloss) / 2, 2 ),
            round( prec, 2 ),
            round( recall, 2 )
    ))
    
print( '''
Seems like a cutoff of around .5 gives us way above average wins 
while participating in a large number of fights.
We are capturing 65% of the wins (recall) and winning 70% of the time.
Strangely though, we could win 63% of the time and capture 100% of the wins by
always betting to win.
I guess we need to think about betting and what make the most sense.
Here are the stats for a .5 cutoff:
''')

print( classification_report( 
    data.Winner, 
    [ 1 if x > 0.5 else 0 for x in pred ]
))

Mean wins: 0.632238547968885 
Mean predict: 0.5298946128329104

Accuracy predicting all wins:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       851
           1       0.63      1.00      0.77      1463

    accuracy                           0.63      2314
   macro avg       0.32      0.50      0.39      2314
weighted avg       0.40      0.63      0.49      2314

Accuracy with varying cutoffs:



Remove most of the insignificant features to see if something looks better. The only problem is that there isn't much predictive value regardless.

In [None]:
features_adj = features.drop(index = result.summary2().tables[1].index[result.summary2().tables[1]["P>|z|"] > .15])
logit_model = sm.Logit(data[target], data[features_adj])
result = logit_model.fit()
print(result.summary2())

Maybe the effects are different by weight class? These results show more promise in some cases (though probably not enough effectiveness for a betting strategy).

In [None]:
classes = pd.DataFrame(data.weight_class.value_counts())
classes.drop(index = classes.index[np.where(classes.weight_class < 100)], inplace = True)

for x in range(len(classes.index)):
    df = data.loc[data.weight_class == classes.index[x]]
    print("Class: " + classes.index[x])
    logit_model = sm.Logit(df[target], df[features_adj])
    result = logit_model.fit()
    print(result.summary2())