In [10]:

# standard library imports
import git, os, sys

git_repo = git.Repo(os.getcwd(), search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")
os.chdir(f'{git_root}/src')
sys.path.append(os.path.abspath(os.path.join(f'{git_root}/src')))
print(f'Changed working directory to {os.getcwd()}')

# local imports
from fight_stat_helpers import *
from data_handler import DataHandler

Changed working directory to C:\Users\Alex\OneDrive\Documents\GitHub\UFC_Prediction_2022\src


# NEW FEATURES

- Dominance Score for a single fight: (total score of fighter based on events in the fight) - (total score of opponent based on events in the fight)
- Average Dominance over time scales
- Fighter Scores
- Fight Math
- Fighter Score increments based on closeness of fights with opponents with high scores 

# Other things to try
- with or without regularization
- with or without scaling
- with or without sum features (seems like only age needs sum features)
- with or without squared features (seems like only age needs squared features)
- Decide whether to include SDEC in bullshit or not
- Do linear reg on draws and SDEC and see how this compares to log reg plane (hypothesis: they are the same plane since close fights should be near the plane boundary)

In [11]:
dh = DataHandler()
# get reported derived doubled data
ufc_fights_reported_derived_doubled = dh.get('ufc_fights_reported_derived_doubled')

In [12]:
ufc_fights_predictive_flattened_diffs = dh.make_ufc_fights_predictive_flattened_diffs(ufc_fights_reported_derived_doubled)
ufc_fights_predictive_flattened_diffs = dh.clean_ufc_fights_for_winner_prediction(ufc_fights_predictive_flattened_diffs)

In [13]:
# drop the 'fighter_result' column and opponent_result column as it is not needed for the model and also fighter and opponent columns
X = ufc_fights_predictive_flattened_diffs.drop(columns=['fighter', 'opponent', 'method', 'division'])
# KEEP result in X as this is what model_score is expecting for better or for worse
y = X['result']

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44)

ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

# Use the entire data set with all features as a baseline

In [6]:
all_features = X_train.columns.tolist()
# remove result from all_features
if 'result' in all_features:
    all_features.remove('result')
model_test_score(X_train, X_test, all_features, _max_iter = 20000, scaled=True)

Training set size: (5061, 317) accuracy: 0.662912467891721
Test set size: (1266, 317) accuracy: 0.6216429699842022
Test set neg log loss: -0.665645414790237. Probability to observe data given model: 0.5139417150555374


In [7]:
best_features = additive_greedy(X_train, X_test, search_doubles=True, _max_iter=30000)

Starting with an empty feature set.
Added single feature: age_diff
Negative log loss on training set: -0.6763950894451868
Training set size: (5061, 317) accuracy: 0.5730092867022327
Test set size: (1266, 317) accuracy: 0.5821484992101106
Test set neg log loss: -0.6766789533820871. Probability to observe data given model: 0.5083022879382675
Added single feature: l5y_wins_diff
Negative log loss on training set: -0.6683557844027358
Training set size: (5061, 317) accuracy: 0.5858525983007311
Test set size: (1266, 317) accuracy: 0.5781990521327014
Test set neg log loss: -0.670067272044672. Probability to observe data given model: 0.5116741552620884
Added single feature: l5y_wins_wins_diff
Negative log loss on training set: -0.6616599926896448
Training set size: (5061, 317) accuracy: 0.5961272475795297
Test set size: (1266, 317) accuracy: 0.6034755134281201
Test set neg log loss: -0.6621490899544908. Probability to observe data given model: 0.5157417671878041
Added single feature: l1y_inf_he

In [8]:
best_features = ['age_diff',
 'l5y_wins_diff',
 'l5y_wins_wins_diff',
 'l1y_inf_head_strikes_accuracy_diff',
 'l5y_abs_control_per_min_diff',
 'l3y_abs_takedowns_attempts_per_min_diff',
 'l3y_abs_head_strikes_landed_per_min_diff',
 'all_inf_total_strikes_accuracy_diff',
 'l5y_losses_losses_diff',
 'l1y_inf_clinch_strikes_accuracy_diff',
 'all_wins_wins_diff',
 'all_num_fights_diff',
 'l3y_inf_clinch_strikes_landed_per_min_diff',
 'reach_diff',
 'all_inf_takedowns_attempts_per_min_diff',
 'l5y_inf_clinch_strikes_landed_per_min_diff',
 'l3y_wins_wins_diff',
 'l3y_inf_ground_strikes_accuracy_diff',
 'l3y_abs_ground_strikes_accuracy_diff',
 'l5y_defensive_grappling_loss_diff',
 'l1y_inf_reversals_per_min_diff',
 'all_defensive_grappling_loss_diff',
 'l1y_inf_head_strikes_landed_per_min_diff',
 'all_inf_total_strikes_landed_per_min_diff',
 'l1y_abs_distance_strikes_accuracy_diff',
 'l1y_abs_takedowns_landed_per_min_diff',
 'l3y_abs_takedowns_accuracy_diff',
 'l1y_abs_leg_strikes_accuracy_diff',
 'all_abs_leg_strikes_accuracy_diff',
 'l3y_inf_knockdowns_per_min_diff',
 'all_inf_distance_strikes_accuracy_diff',
 'all_offensive_grappling_score_diff',
 'all_abs_takedowns_landed_per_min_diff',
 'l1y_abs_takedowns_accuracy_diff',
 'l1y_inf_total_strikes_accuracy_diff',
 'all_abs_control_per_min_diff',
 'all_inf_clinch_strikes_accuracy_diff',
 'all_inf_leg_strikes_accuracy_diff',
 'l1y_inf_knockdowns_per_min_diff',
 'l1y_wins_wins_diff',
 'l3y_inf_total_strikes_accuracy_diff',
 'all_abs_ground_strikes_accuracy_diff',
 'l5y_inf_body_strikes_accuracy_diff',
 'age_sq_diff',
 'l1y_inf_body_strikes_landed_per_min_diff',
 'l3y_inf_body_strikes_attempts_per_min_diff',
 'all_inf_body_strikes_landed_per_min_diff',
 'l3y_inf_total_strikes_attempts_per_min_diff',
 'l1y_inf_body_strikes_accuracy_diff',
 'l5y_inf_body_strikes_landed_per_min_diff',
 'all_inf_clinch_strikes_landed_per_min_diff',
 'l1y_abs_distance_strikes_attempts_per_min_diff',
 'l1y_abs_body_strikes_landed_per_min_diff',
 'l5y_abs_sig_strikes_landed_per_min_diff',
 'l3y_overall_fighter_score_diff',
 'all_abs_body_strikes_attempts_per_min_diff',
 'all_abs_total_strikes_accuracy_diff',
 'l5y_abs_sig_strikes_accuracy_diff',
 'all_abs_total_strikes_landed_per_min_diff',
 'l3y_losses_diff',
 'l5y_wins_ko_diff',
 'all_wins_ko_diff',
 'l5y_abs_total_strikes_landed_per_min_diff',
 'all_inf_takedowns_landed_per_min_diff',
 'all_inf_leg_strikes_landed_per_min_diff',
 'l3y_inf_leg_strikes_attempts_per_min_diff',
 'all_abs_sig_strikes_landed_per_min_diff',
 'l1y_abs_clinch_strikes_attempts_per_min_diff',
 'l1y_abs_clinch_strikes_landed_per_min_diff',
 'l3y_abs_clinch_strikes_landed_per_min_diff',
 'l5y_abs_clinch_strikes_attempts_per_min_diff']

model_test_score(X_train, X_test, best_features, _max_iter = 30000, scaled=True)

KeyError: "['age_sq_diff'] not in index"

In [None]:
# with wins wins and losses losses added in
# test split random_state=42
# Train set accuracy: 0.644695170229612
# Test set accuracy: 0.6416139240506329
best_features1 = ['age_diff',
 'l5y_overall_fighter_score_diff',
 'l3y_wins_diff',
 'l3y_wins_wins_diff',
 'all_inf_head_strikes_accuracy_diff',
 'l3y_abs_head_strikes_accuracy_diff',
 'l5y_inf_takedowns_attempts_per_min_diff',
 'l5y_abs_takedowns_attempts_per_min_diff',
 'l3y_inf_clinch_strikes_landed_per_min_diff',
 'l5y_losses_losses_diff',
 'l1y_abs_clinch_strikes_accuracy_diff',
 'l5y_wins_diff',
 'l5y_losses_ko_diff',
 'l3y_abs_body_strikes_accuracy_diff',
 'l1y_abs_clinch_strikes_landed_per_min_diff',
 'l5y_wins_wins_diff',
 'l1y_wins_wins_diff',
 'l1y_abs_total_strikes_landed_per_min_diff',
 'l3y_defensive_grappling_loss_diff',
 'all_abs_total_strikes_landed_per_min_diff',
 'l5y_inf_clinch_strikes_landed_per_min_diff',
 'l3y_losses_diff',
 'all_losses_dec_diff',
 'all_wins_wins_diff',
 'l1y_abs_body_strikes_landed_per_min_diff',
 'all_wins_diff',
 'l1y_abs_head_strikes_accuracy_diff',
 'l1y_inf_total_strikes_landed_per_min_diff',
 'l1y_inf_reversals_per_min_diff',
 'l3y_inf_takedowns_accuracy_diff',
 'all_abs_body_strikes_attempts_per_min_diff',
 'l3y_inf_total_strikes_attempts_per_min_diff',
 'l5y_inf_knockdowns_per_min_diff',
 'l1y_overall_fighter_score_diff',
 'all_abs_leg_strikes_attempts_per_min_diff',
 'l5y_abs_leg_strikes_landed_per_min_diff',
 'l3y_losses_sub_diff',
 'l3y_abs_knockdowns_per_min_diff',
 'l3y_overall_fighter_score_diff',
 'all_abs_body_strikes_landed_per_min_diff',
 'l1y_abs_body_strikes_attempts_per_min_diff',
 'l3y_inf_leg_strikes_landed_per_min_diff',
 'all_abs_ground_strikes_attempts_per_min_diff',
 'l5y_inf_leg_strikes_attempts_per_min_diff',
 'all_inf_leg_strikes_landed_per_min_diff',
 'l1y_abs_total_strikes_accuracy_diff',
 'l1y_abs_distance_strikes_accuracy_diff',
 'l1y_inf_head_strikes_accuracy_diff',
 'l5y_abs_ground_strikes_landed_per_min_diff',
 'all_inf_clinch_strikes_accuracy_diff',
 'all_inf_takedowns_accuracy_diff']

model_test_score(X_train, X_test, best_features1, _max_iter = 30000, scaled=True)

In [None]:
# with wins wins but not losses losses
# test split random_state=42
# Train set accuracy: 0.6393507521773555
# Test set accuracy: 0.6431962025316456
best_features2 = ['age_sq_diff',
 'l5y_overall_fighter_score_diff',
 'l3y_wins_diff',
 'l3y_wins_wins_diff',
 'all_inf_head_strikes_accuracy_diff',
 'l3y_abs_head_strikes_accuracy_diff',
 'l5y_inf_takedowns_attempts_per_min_diff',
 'l5y_abs_takedowns_attempts_per_min_diff',
 'l3y_inf_clinch_strikes_landed_per_min_diff',
 'l1y_abs_clinch_strikes_accuracy_diff',
 'l3y_abs_body_strikes_accuracy_diff',
 'l3y_overall_fighter_score_diff',
 'l5y_inf_knockdowns_per_min_diff',
 'l5y_wins_wins_diff',
 'l1y_wins_wins_diff',
 'l1y_abs_clinch_strikes_landed_per_min_diff',
 'l1y_abs_total_strikes_landed_per_min_diff',
 'l3y_defensive_grappling_loss_diff',
 'l5y_losses_ko_diff',
 'l5y_wins_diff',
 'l1y_inf_reversals_per_min_diff',
 'l1y_abs_body_strikes_landed_per_min_diff',
 'l3y_inf_takedowns_accuracy_diff',
 'l3y_losses_sub_diff',
 'l1y_inf_total_strikes_landed_per_min_diff',
 'l3y_inf_total_strikes_attempts_per_min_diff',
 'all_abs_body_strikes_attempts_per_min_diff',
 'l1y_abs_head_strikes_accuracy_diff',
 'all_abs_body_strikes_landed_per_min_diff',
 'l1y_overall_fighter_score_diff',
 'l1y_abs_body_strikes_attempts_per_min_diff',
 'l3y_abs_control_per_min_diff',
 'l1y_abs_total_strikes_accuracy_diff',
 'l1y_abs_distance_strikes_accuracy_diff',
 'l3y_losses_diff',
 'all_losses_dec_diff',
 'all_wins_wins_diff',
 'all_wins_diff',
 'l1y_inf_head_strikes_accuracy_diff',
 'l5y_inf_clinch_strikes_landed_per_min_diff',
 'all_inf_clinch_strikes_accuracy_diff',
 'all_abs_leg_strikes_attempts_per_min_diff',
 'l5y_abs_leg_strikes_landed_per_min_diff',
 'l3y_inf_leg_strikes_attempts_per_min_diff',
 'all_offensive_grappling_score_diff']

model_test_score(X_train, X_test, best_features2, _max_iter = 30000, scaled=True)

In [None]:
# got 63% on a test set (matching training accuracy)
feature_set_no_sum3 = ['age_diff',
                      'age_sum',
 'l5y_overall_fighter_score_diff',
 'l3y_wins_diff',
 'l3y_wins_wins_diff',
 'all_inf_head_strikes_accuracy_diff',
 'l3y_abs_head_strikes_accuracy_diff',
 'l5y_inf_takedowns_attempts_per_min_diff',
 'l5y_abs_takedowns_attempts_per_min_diff',
 'l3y_inf_clinch_strikes_landed_per_min_diff',
 'l1y_abs_clinch_strikes_accuracy_diff',
 'l3y_abs_body_strikes_accuracy_diff',
 'l3y_overall_fighter_score_diff',
 'l5y_inf_knockdowns_per_min_diff',
 'l5y_wins_wins_diff',
 'l1y_wins_wins_diff',
 'l1y_abs_clinch_strikes_landed_per_min_diff',
 'l1y_abs_total_strikes_landed_per_min_diff',
 'l3y_inf_takedowns_accuracy_diff',
 'l3y_defensive_grappling_loss_diff',
 'l1y_inf_reversals_per_min_diff',
 'l3y_inf_total_strikes_attempts_per_min_diff',
 'l3y_losses_sub_diff',
 'l1y_abs_head_strikes_accuracy_diff',
 'l1y_inf_total_strikes_landed_per_min_diff',
 'l1y_abs_body_strikes_landed_per_min_diff',
 'l5y_abs_body_strikes_attempts_per_min_diff',
 'all_abs_body_strikes_landed_per_min_diff',
 'all_abs_body_strikes_attempts_per_min_diff',
 'l1y_abs_body_strikes_attempts_per_min_diff',
 'l1y_abs_total_strikes_accuracy_diff',
 'l1y_overall_fighter_score_diff',
 'l1y_abs_distance_strikes_accuracy_diff',
 'l3y_inf_clinch_strikes_accuracy_diff',
 'all_abs_leg_strikes_attempts_per_min_diff',
 'l5y_abs_leg_strikes_landed_per_min_diff',
 'l1y_inf_total_strikes_accuracy_diff',
 'l1y_abs_total_strikes_attempts_per_min_diff',
 'l3y_inf_leg_strikes_landed_per_min_diff',
 'l1y_abs_distance_strikes_attempts_per_min_sum',
 'l3y_inf_sub_attempts_per_min_diff',
 'l5y_inf_takedowns_landed_per_min_diff',
 'l3y_inf_reversals_per_min_diff',
 'l5y_offensive_grappling_score_diff',
 'l1y_abs_knockdowns_per_min_diff',
]

model_test_score(X_train, X_test, feature_set_no_sum3, _max_iter = 30000, scaled=True)

In [None]:
# result without squares
best_additive_features4 = [
    'l3y_wins_diff',
    'age_diff',
    'l5y_offensive_grappling_score_diff',
    'l5y_inf_ground_strikes_attempts_per_min_diff',
    'height_sum',
    'l5y_losses_ko_diff',
    'l5y_defensive_grappling_loss_diff',
    'l5y_inf_ground_strikes_accuracy_sum',
    'all_inf_takedowns_attempts_per_min_diff',
    'l1y_inf_clinch_strikes_accuracy_sum',
    'l1y_wins_sub_sum',
    'l1y_inf_total_strikes_accuracy_sum',
    'l5y_abs_ground_strikes_landed_per_min_diff',
    'l1y_abs_ground_strikes_attempts_per_min_diff',
    'all_abs_ground_strikes_attempts_per_min_diff',
    'l1y_abs_control_per_min_diff',
    'l1y_abs_clinch_strikes_accuracy_diff',
    'l1y_inf_takedowns_attempts_per_min_diff',
    'l3y_abs_body_strikes_landed_per_min_sum',
    'all_abs_leg_strikes_landed_per_min_sum',
    'l1y_losses_ko_diff',
    'l1y_abs_ground_strikes_accuracy_sum',
    'l5y_abs_ground_strikes_accuracy_sum',
    'l1y_abs_sub_attempts_per_min_sum',
    'all_abs_sub_attempts_per_min_sum',
    'reach_diff',
    'all_inf_reversals_per_min_diff',
    'l3y_inf_reversals_per_min_diff',
    'l1y_abs_takedowns_landed_per_min_sum',
    'l1y_abs_control_per_min_sum',
    'all_inf_head_strikes_attempts_per_min_sum',
    'l5y_abs_distance_strikes_accuracy_sum',
    'l1y_inf_control_per_min_sum',
    'l3y_inf_ground_strikes_attempts_per_min_sum',
    'l3y_inf_body_strikes_accuracy_sum',
    'all_inf_ground_strikes_landed_per_min_sum',
    'l3y_losses_sum',
    'l1y_inf_takedowns_accuracy_diff',
    'l3y_inf_leg_strikes_accuracy_diff',
    'l5y_inf_total_strikes_attempts_per_min_diff',
    'all_inf_total_strikes_landed_per_min_diff',
    'l3y_inf_head_strikes_landed_per_min_diff',
    'all_inf_clinch_strikes_landed_per_min_diff',
    'l3y_inf_clinch_strikes_attempts_per_min_diff',
    'l3y_inf_ground_strikes_accuracy_diff',
    'l3y_abs_distance_strikes_accuracy_sum',
    'all_abs_body_strikes_attempts_per_min_diff',
    'l5y_abs_body_strikes_landed_per_min_diff',
    'l1y_losses_dec_sum',
    'l1y_inf_sig_strikes_accuracy_diff',
    'l1y_inf_clinch_strikes_attempts_per_min_diff',
    'l5y_inf_head_strikes_attempts_per_min_diff',
    'l3y_inf_clinch_strikes_landed_per_min_diff',
    'l5y_num_fights_diff',
    'l1y_inf_ground_strikes_landed_per_min_diff',
    'l5y_inf_knockdowns_per_min_diff',
    'l1y_inf_knockdowns_per_min_diff',
    'l5y_abs_body_strikes_attempts_per_min_diff',
    'l3y_inf_total_strikes_attempts_per_min_diff',
    'all_inf_body_strikes_landed_per_min_diff',
    'l3y_abs_knockdowns_per_min_sum',
    'l3y_abs_sub_attempts_per_min_diff',
    'l5y_abs_sub_attempts_per_min_diff',
    'l3y_abs_ground_strikes_landed_per_min_diff',
    'l3y_abs_ground_strikes_attempts_per_min_diff',
    'l1y_abs_body_strikes_accuracy_diff',
    'l1y_inf_knockdowns_per_min_sum',
    'all_inf_knockdowns_per_min_diff',
    'all_inf_reversals_per_min_sum',
    'l5y_inf_ground_strikes_landed_per_min_diff',
    'all_abs_distance_strikes_attempts_per_min_diff',
    'all_inf_head_strikes_accuracy_diff',
    'l3y_abs_clinch_strikes_accuracy_diff',
    'l1y_abs_body_strikes_landed_per_min_diff',
    'l3y_inf_body_strikes_accuracy_diff',
    'all_abs_clinch_strikes_accuracy_diff',
    'all_inf_body_strikes_accuracy_diff',
    'all_abs_reversals_per_min_sum',
    'l5y_abs_reversals_per_min_sum',
    'l1y_abs_distance_strikes_accuracy_sum',
    'l1y_abs_sig_strikes_accuracy_sum',
    'all_abs_total_strikes_accuracy_sum',
    'l5y_abs_sig_strikes_accuracy_sum',
    'all_inf_ground_strikes_accuracy_sum',
    'l5y_inf_clinch_strikes_landed_per_min_diff',
    'l3y_inf_head_strikes_attempts_per_min_diff',
    'all_wins_dec_diff',
    'all_inf_head_strikes_landed_per_min_sum',
    'l1y_inf_total_strikes_accuracy_diff',
    'l5y_inf_ground_strikes_landed_per_min_sum',
    'l3y_inf_ground_strikes_landed_per_min_sum',
    'l5y_defensive_grappling_loss_sum',
    'l1y_abs_head_strikes_landed_per_min_diff',
    'l1y_abs_head_strikes_accuracy_diff',
    'all_abs_head_strikes_landed_per_min_diff',
    'l1y_inf_distance_strikes_attempts_per_min_diff',
    'all_abs_head_strikes_attempts_per_min_diff',
    'all_inf_distance_strikes_landed_per_min_diff',
    'l1y_inf_clinch_strikes_accuracy_diff',
    'l1y_inf_clinch_strikes_landed_per_min_diff',
    'l1y_losses_dec_diff',
    'l5y_abs_clinch_strikes_landed_per_min_diff',
    'all_inf_head_strikes_attempts_per_min_diff',
    'l1y_inf_sub_attempts_per_min_diff',
    'l3y_inf_sub_attempts_per_min_diff',
    'l3y_abs_head_strikes_landed_per_min_diff',
    'l1y_inf_sig_strikes_attempts_per_min_diff',
    'l1y_abs_total_strikes_attempts_per_min_diff', 
    'l1y_abs_total_strikes_landed_per_min_diff',
    'all_inf_leg_strikes_accuracy_diff',
    'l3y_abs_body_strikes_accuracy_diff',
    'l3y_inf_sig_strikes_accuracy_diff',
    'all_abs_body_strikes_landed_per_min_diff',
    'all_inf_ground_strikes_accuracy_diff',
    'all_abs_leg_strikes_accuracy_diff',
    'all_abs_total_strikes_landed_per_min_diff',
    'all_abs_clinch_strikes_landed_per_min_diff',
]

model_test_score(X_train, X_test, best_additive_features4, _max_iter = 30000, scaled=True)

In [None]:
# result without squares
best_additive_features_no_sum5 = [
    'l3y_wins_diff',
    'age_diff',
    'age_sum',
    'l5y_offensive_grappling_score_diff',
    'l5y_inf_ground_strikes_attempts_per_min_diff',
    'l5y_losses_ko_diff',
    'l5y_defensive_grappling_loss_diff',
    'all_inf_takedowns_attempts_per_min_diff',
    'l5y_abs_ground_strikes_landed_per_min_diff',
    'l1y_abs_ground_strikes_attempts_per_min_diff',
    'all_abs_ground_strikes_attempts_per_min_diff',
    'l1y_abs_control_per_min_diff',
    'l1y_abs_clinch_strikes_accuracy_diff',
    'l1y_inf_takedowns_attempts_per_min_diff',
    'l1y_losses_ko_diff',
    'reach_diff',
    'all_inf_reversals_per_min_diff',
    'l3y_inf_reversals_per_min_diff',
    'l3y_inf_ground_strikes_attempts_per_min_sum',
    'all_inf_ground_strikes_landed_per_min_sum',
    'l1y_inf_takedowns_accuracy_diff',
    'l3y_inf_leg_strikes_accuracy_diff',
    'l5y_inf_total_strikes_attempts_per_min_diff',
    'all_inf_total_strikes_landed_per_min_diff',
    'l3y_inf_head_strikes_landed_per_min_diff',
    'all_inf_clinch_strikes_landed_per_min_diff',
    'l3y_inf_clinch_strikes_attempts_per_min_diff',
    'l3y_inf_ground_strikes_accuracy_diff',
    'all_abs_body_strikes_attempts_per_min_diff',
    'l5y_abs_body_strikes_landed_per_min_diff',
    'l1y_inf_sig_strikes_accuracy_diff',
    'l1y_inf_clinch_strikes_attempts_per_min_diff',
    'l5y_inf_head_strikes_attempts_per_min_diff',
    'l3y_inf_clinch_strikes_landed_per_min_diff',
    'l5y_num_fights_diff',
    'l1y_inf_ground_strikes_landed_per_min_diff',
    'l5y_inf_knockdowns_per_min_diff',
    'l1y_inf_knockdowns_per_min_diff',
    'l5y_abs_body_strikes_attempts_per_min_diff',
    'l3y_inf_total_strikes_attempts_per_min_diff',
    'all_inf_body_strikes_landed_per_min_diff',
    'l3y_abs_sub_attempts_per_min_diff',
    'l5y_abs_sub_attempts_per_min_diff',
    'l3y_abs_ground_strikes_landed_per_min_diff',
    'l3y_abs_ground_strikes_attempts_per_min_diff',
    'l1y_abs_body_strikes_accuracy_diff',
    'l1y_inf_knockdowns_per_min_sum',
    'all_inf_knockdowns_per_min_diff',
    'all_inf_reversals_per_min_sum',
    'l5y_inf_ground_strikes_landed_per_min_diff',
    'all_abs_distance_strikes_attempts_per_min_diff',
    'all_inf_head_strikes_accuracy_diff',
    'l3y_abs_clinch_strikes_accuracy_diff',
    'l1y_abs_body_strikes_landed_per_min_diff',
    'l3y_inf_body_strikes_accuracy_diff',
    'all_abs_clinch_strikes_accuracy_diff',
    'all_inf_body_strikes_accuracy_diff',
    'l5y_inf_clinch_strikes_landed_per_min_diff',
    'l3y_inf_head_strikes_attempts_per_min_diff',
    'all_wins_dec_diff',
    'l1y_inf_total_strikes_accuracy_diff',
    'l1y_abs_head_strikes_landed_per_min_diff',
    'l1y_abs_head_strikes_accuracy_diff',
    'all_abs_head_strikes_landed_per_min_diff',
    'l1y_inf_distance_strikes_attempts_per_min_diff',
    'all_abs_head_strikes_attempts_per_min_diff',
    'all_inf_distance_strikes_landed_per_min_diff',
    'l1y_inf_clinch_strikes_accuracy_diff',
    'l1y_inf_clinch_strikes_landed_per_min_diff',
    'l1y_losses_dec_diff',
    'l5y_abs_clinch_strikes_landed_per_min_diff',
    'all_inf_head_strikes_attempts_per_min_diff',
    'l1y_inf_sub_attempts_per_min_diff',
    'l3y_inf_sub_attempts_per_min_diff',
    'l3y_abs_head_strikes_landed_per_min_diff',
    'l1y_inf_sig_strikes_attempts_per_min_diff',
    'l1y_abs_total_strikes_attempts_per_min_diff', 
    'l1y_abs_total_strikes_landed_per_min_diff',
    'all_inf_leg_strikes_accuracy_diff',
    'l3y_abs_body_strikes_accuracy_diff',
    'l3y_inf_sig_strikes_accuracy_diff',
    'all_abs_body_strikes_landed_per_min_diff',
    'all_inf_ground_strikes_accuracy_diff',
    'all_abs_leg_strikes_accuracy_diff',
    'all_abs_total_strikes_landed_per_min_diff',
    'all_abs_clinch_strikes_landed_per_min_diff',
]

model_test_score(X_train, X_test, best_additive_features_no_sum5, _max_iter = 30000, scaled=True)

In [None]:
# result when we had squares
# training accuracy was over 70 and test was at 60
best_additive_features6 = ['l3y_wins_diff',
'age_sq_diff',
'l5y_inf_takedowns_landed_per_min_diff',
'l1y_inf_distance_strikes_landed_per_min_sq_sum',
'l5y_losses_ko_sq_diff',
'l5y_inf_ground_strikes_attempts_per_min_diff',
'l3y_inf_leg_strikes_landed_per_min_sum',
'l5y_defensive_grappling_loss_diff',
'l1y_abs_clinch_strikes_accuracy_sq_diff',
'l5y_abs_head_strikes_accuracy_sq_diff',
'all_inf_takedowns_attempts_per_min_diff',
'l1y_inf_takedowns_attempts_per_min_sq_diff',
'l1y_abs_clinch_strikes_landed_per_min_sq_diff',
'height_sq_sum',
'all_inf_reversals_per_min_sq_diff',
'all_wins_dec_sq_sum',
'l3y_losses_dec_sq_sum',
'all_inf_clinch_strikes_attempts_per_min_sum',
'l1y_inf_head_strikes_attempts_per_min_sq_sum',
'l1y_abs_body_strikes_attempts_per_min_sq_sum',
'l5y_wins_ko_diff',
'l1y_abs_sub_attempts_per_min_sum',
'l1y_abs_knockdowns_per_min_sq_sum',
'l3y_abs_ground_strikes_landed_per_min_sq_sum',
'l1y_inf_control_per_min_sq_sum',
'all_inf_total_strikes_landed_per_min_sum',
'l5y_abs_takedowns_accuracy_sq_sum',
'l5y_inf_ground_strikes_accuracy_sq_sum',
'l3y_inf_clinch_strikes_accuracy_sum',
'l1y_abs_clinch_strikes_attempts_per_min_diff',
'all_abs_body_strikes_attempts_per_min_sq_diff',
'l5y_inf_head_strikes_attempts_per_min_sq_diff',
'l1y_inf_sig_strikes_landed_per_min_sq_diff',
'l3y_inf_distance_strikes_landed_per_min_diff',
'l3y_inf_ground_strikes_accuracy_sq_diff',
'l1y_abs_takedowns_accuracy_sq_sum',
'l1y_abs_takedowns_accuracy_sum',
'l3y_wins_sub_sum',
'l3y_abs_body_strikes_attempts_per_min_sq_diff',
'l1y_inf_takedowns_accuracy_diff',
'reach_diff',
'l5y_inf_sig_strikes_attempts_per_min_sq_diff',
'l1y_wins_ko_diff',
'l3y_wins_ko_diff',
'all_abs_total_strikes_attempts_per_min_sq_diff',
'l1y_abs_head_strikes_landed_per_min_sq_diff',
'l5y_abs_head_strikes_landed_per_min_sq_diff',
'l1y_abs_body_strikes_landed_per_min_diff',
'l1y_inf_clinch_strikes_attempts_per_min_sq_diff',
'l1y_wins_dec_sq_diff',
'l3y_abs_head_strikes_accuracy_sq_diff',
'l3y_losses_sub_sum',
'l5y_defensive_grappling_loss_sq_sum',
'l5y_num_fights_sq_sum',
'l5y_losses_dec_sum',
'l1y_inf_head_strikes_landed_per_min_sum',
'l3y_inf_clinch_strikes_landed_per_min_diff',
'l1y_abs_head_strikes_accuracy_sq_diff',
'l3y_losses_dec_sum',
'l3y_inf_leg_strikes_accuracy_sq_diff',
'l1y_abs_control_per_min_sum',
'l3y_abs_control_per_min_sq_sum',
'l5y_inf_head_strikes_accuracy_sq_sum',
'l3y_inf_sig_strikes_accuracy_sq_sum',
'all_inf_head_strikes_accuracy_sq_diff',
'l3y_inf_sig_strikes_accuracy_sq_diff',
'l3y_inf_body_strikes_landed_per_min_sq_diff',
'l1y_inf_body_strikes_accuracy_sq_sum',
'all_inf_clinch_strikes_landed_per_min_sum',
'l3y_inf_clinch_strikes_attempts_per_min_diff',
'all_abs_control_per_min_sq_sum',
'l1y_inf_distance_strikes_attempts_per_min_diff',
'l3y_losses_sq_diff',
'l1y_wins_ko_sum',
'all_abs_takedowns_attempts_per_min_diff',
'l1y_abs_takedowns_attempts_per_min_sq_diff',
'l1y_abs_takedowns_attempts_per_min_diff',
'l1y_abs_control_per_min_diff',
'l3y_inf_clinch_strikes_landed_per_min_sq_diff',
'all_inf_clinch_strikes_landed_per_min_diff',
'l3y_inf_distance_strikes_accuracy_sum',
'all_inf_distance_strikes_landed_per_min_diff',
'l1y_abs_total_strikes_landed_per_min_diff',
'l5y_abs_knockdowns_per_min_sq_diff',
'l1y_abs_leg_strikes_attempts_per_min_sum',
'l5y_abs_leg_strikes_landed_per_min_sq_sum',
'l3y_inf_clinch_strikes_attempts_per_min_sq_sum',
'all_inf_clinch_strikes_landed_per_min_sq_diff',
'l3y_inf_body_strikes_accuracy_sq_diff',
'l5y_inf_body_strikes_accuracy_diff',
'l1y_wins_ko_sq_sum',
'l1y_inf_total_strikes_attempts_per_min_sq_sum',
'l3y_abs_ground_strikes_accuracy_sum',
'all_abs_ground_strikes_accuracy_sum',
'l1y_inf_reversals_per_min_sq_diff',
'l3y_losses_sub_sq_sum',
'l3y_defensive_grappling_loss_sq_sum',
'all_inf_clinch_strikes_attempts_per_min_diff',
'l1y_inf_clinch_strikes_attempts_per_min_sq_sum',
'l5y_wins_sq_sum',
'l5y_losses_sq_sum',
'all_losses_ko_sum',
'all_wins_sub_sq_sum',
'l1y_abs_control_per_min_sq_diff',
'l5y_inf_ground_strikes_attempts_per_min_sq_sum',
'l5y_wins_sub_sum',
'l5y_losses_sum',
'all_losses_dec_diff',
'l1y_losses_ko_sq_diff',
'l1y_losses_ko_diff',
'all_losses_sq_diff',
'l3y_inf_body_strikes_landed_per_min_diff',
'l5y_inf_ground_strikes_accuracy_sum',
'l5y_abs_ground_strikes_accuracy_sq_sum',
'all_abs_ground_strikes_accuracy_sq_sum',
'all_wins_sum',
'l5y_inf_clinch_strikes_landed_per_min_sq_sum',
'l1y_inf_clinch_strikes_landed_per_min_sq_sum',
'l3y_abs_total_strikes_attempts_per_min_sq_diff',
'all_inf_distance_strikes_accuracy_sq_diff',
'l5y_abs_ground_strikes_landed_per_min_sum',
'l1y_abs_ground_strikes_attempts_per_min_sum',
'l3y_inf_sig_strikes_attempts_per_min_sq_diff',
'l5y_inf_clinch_strikes_landed_per_min_diff',
'l5y_inf_reversals_per_min_sq_diff',
'all_inf_total_strikes_landed_per_min_sq_sum',
'l5y_inf_ground_strikes_attempts_per_min_sq_diff',
'l3y_inf_ground_strikes_attempts_per_min_sq_sum',
'l3y_abs_ground_strikes_landed_per_min_sum',
'l1y_inf_sig_strikes_attempts_per_min_sq_diff',
'all_num_fights_sq_sum',
'l1y_inf_head_strikes_landed_per_min_sq_sum',
'l3y_abs_leg_strikes_attempts_per_min_sq_diff',
'all_abs_leg_strikes_attempts_per_min_diff',
'all_inf_clinch_strikes_landed_per_min_sq_sum',
'all_inf_ground_strikes_attempts_per_min_sq_sum',
'all_abs_leg_strikes_landed_per_min_sq_sum',
'l3y_inf_ground_strikes_landed_per_min_sq_diff',
]

model_test_score(X_train, X_test, best_additive_features6, _max_iter = 30000, scaled=True)

In [None]:
# find the best regularization params
from sklearn.model_selection import GridSearchCV
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l2'],  # l1 is not supported by lbfgs solver
    'solver': ['lbfgs'],
    'max_iter': [1000, 2000, 5000, 10000]
}
grid_search = GridSearchCV(LogisticRegression(), param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)
print(f'Best parameters: {grid_search.best_params_}')
print(f'Best score: {grid_search.best_score_}')
best_model = grid_search.best_estimator_
# evaluate the best model on the test set
X_test_scaled = scaler.transform(X_test[best_additive_features])
test_score = best_model.score(X_test_scaled, y_test)

# so use c= 0.1
# TODO maybe try this again on a partial higher scoring feature set


In [None]:
amazing_feature_set7 = [
    'age_diff',
    'l1y_inf_clinch_strikes_attempts_per_min_diff',
    'l5y_inf_head_strikes_accuracy_diff',
    'all_wins_diff',
    'l5y_abs_total_strikes_accuracy_diff',
    'all_abs_leg_strikes_accuracy_diff',
    'l2y_abs_leg_strikes_accuracy_diff',
    'l5y_inf_body_strikes_accuracy_diff',
    'l1y_abs_reversals_per_min_diff',
    'l2y_abs_clinch_strikes_accuracy_diff',
    'l1y_inf_leg_strikes_accuracy_diff',
    'all_wins_dec_diff',
    'all_inf_takedowns_accuracy_diff',
    'l1y_wins_ko_diff',
    'l1y_inf_ground_strikes_accuracy_diff',
    'l3y_inf_ground_strikes_accuracy_diff',
    'l1y_abs_distance_strikes_attempts_per_min_diff',
    'l1y_abs_body_strikes_landed_per_min_diff',
    'l1y_inf_distance_strikes_landed_per_min_diff',
    'l3y_abs_takedowns_accuracy_diff',
    'all_abs_takedowns_accuracy_diff',
    'l3y_inf_distance_strikes_accuracy_diff',
    'l1y_offensive_standing_striking_score_diff',
    'l2y_wins_sub_diff',
    'all_abs_body_strikes_accuracy_diff',
    'l5y_abs_body_strikes_accuracy_diff',
    'l2y_defensive_grappling_loss_diff',
    'l3y_abs_body_strikes_accuracy_diff',
    'all_inf_knockdowns_per_min_diff',
    'l3y_defensive_grappling_loss_diff',
    'l2y_inf_head_strikes_attempts_per_min_diff',
    'l3y_inf_head_strikes_attempts_per_min_diff',
    'l1y_abs_total_strikes_landed_per_min_diff',
    'l3y_abs_leg_strikes_accuracy_diff',
    'l5y_abs_sig_strikes_accuracy_diff',
    'l2y_inf_sig_strikes_accuracy_diff',
    'l2y_inf_head_strikes_landed_per_min_diff',
    'l1y_abs_ground_strikes_accuracy_diff',
    'l1y_inf_sig_strikes_accuracy_diff',
    'l3y_inf_reversals_per_min_diff',
    'all_inf_reversals_per_min_diff',
    'l2y_wins_ko_diff',
    'l1y_inf_head_strikes_landed_per_min_diff',
    'l2y_inf_ground_strikes_accuracy_diff',
    'l3y_abs_takedowns_attempts_per_min_diff',
    'all_abs_takedowns_landed_per_min_diff',
    'l1y_abs_takedowns_attempts_per_min_diff',
    'l5y_inf_takedowns_attempts_per_min_diff',
    'l3y_inf_takedowns_attempts_per_min_diff',
    'l5y_inf_knockdowns_per_min_diff',
    'l3y_num_fights_diff',
    'l1y_inf_clinch_strikes_landed_per_min_diff',
    'l1y_inf_clinch_strikes_accuracy_diff',
    'l3y_inf_distance_strikes_attempts_per_min_diff',
    'l2y_num_fights_diff',
    'l5y_inf_ground_strikes_accuracy_diff',
    'all_abs_head_strikes_landed_per_min_diff',
    'l5y_abs_head_strikes_landed_per_min_diff',
    'l1y_abs_sig_strikes_attempts_per_min_diff',
    'l3y_abs_head_strikes_attempts_per_min_diff',
    'l5y_abs_takedowns_landed_per_min_diff',
    'l5y_inf_leg_strikes_attempts_per_min_diff',
    'l3y_inf_leg_strikes_landed_per_min_diff',
    'l1y_inf_distance_strikes_attempts_per_min_diff',
    'l1y_wins_dec_diff',
    'l3y_losses_dec_diff',
    'l2y_losses_ko_diff',
    'l5y_losses_ko_diff',
    'all_inf_leg_strikes_attempts_per_min_diff',
    'l5y_inf_leg_strikes_accuracy_diff',
    'l3y_inf_clinch_strikes_landed_per_min_diff',
    'l5y_inf_clinch_strikes_landed_per_min_diff',
    'l1y_abs_clinch_strikes_landed_per_min_diff',
    'l3y_abs_clinch_strikes_landed_per_min_diff',
    'l1y_abs_clinch_strikes_accuracy_diff',
    'l1y_abs_ground_strikes_attempts_per_min_diff',
    'l5y_abs_clinch_strikes_landed_per_min_diff',
]

model_test_score(X_train, X_test, amazing_feature_set7, _max_iter = 30000, scaled=True)