In [33]:
import git, os, sys
git_repo = git.Repo(os.getcwd(), search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")
os.chdir(f'{git_root}/src')
sys.path.append(os.path.abspath(os.path.join(f'{git_root}/src')))
print(f'Changed working directory to {os.getcwd()}')

Changed working directory to C:\Users\Alex\OneDrive\Documents\GitHub\UFC_Prediction_2022\src


In [34]:
#getting dependencies
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn' (disables SettingWithCopyWarning)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
import numpy as np
from datetime import datetime
from datetime import date
import matplotlib.pyplot as plt
import random
import sklearn
import scipy
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures
from sklearn.feature_selection import VarianceThreshold
import itertools

# from fight_stat_helpers import *

In [3]:
from sklearn.metrics import get_scorer_names

In [None]:
print(get_scorer_names())

In [35]:
#scores a model
def model_score(dataframe, features, iloc_val = 3200, _max_iter = 2000, scoring='neg_log_loss', scaled=True):
    yyy=dataframe['result'].iloc[0:iloc_val]
    XXX=dataframe[features].iloc[0:iloc_val]
    XXXscaler = preprocessing.StandardScaler().fit(XXX)
    XXX_scaled = XXXscaler.transform(XXX) 
    X = XXX_scaled if scaled else XXX
    winPredictionModel=LogisticRegression(solver='lbfgs', max_iter=_max_iter, fit_intercept=False)
    # find the cross val score with log loss
    return cross_val_score(winPredictionModel,X,yyy,cv=4,scoring=scoring).mean()
    
#CODE FOR THE GREEDY ALGORITHM FOR FEATURE SELECTION
def greedy(dataframe, features, subsetsize, iloc_val=3200, _max_iter = 2000, scaled=True, scoring='neg_log_loss', set_of_sets=False):
    if set_of_sets:
        s=set([tuple(feature) for feature in features])
    else:
        s=set(features)
    subsets=list(map(set, itertools.combinations(s, subsetsize))) #subsets of size (subsetsize)
    scores_dict = {}
    for subset in subsets:
        if set_of_sets:
            list_of_features = []
            for feature_set in subset:
                list_of_features.extend(list(feature_set))
        else:
            list_of_features = list(subset)
        scores_dict[tuple(subset)]=model_score(dataframe, list_of_features, iloc_val, _max_iter, scaled=scaled, scoring=scoring)
    max_key = max(scores_dict, key=scores_dict.get)
    max_score = scores_dict[max_key]
    print(f'best subset: {max_key}')
    print(f'with score {max_score}')
    best_feature_list = []
    if set_of_sets:
        for feature_set in max_key:
            best_feature_list.extend(list(feature_set))
    else:
        best_feature_list = list(max_key)
    return best_feature_list, max_score

def reductive_greedy(dataframe, starting_features=None):
    # remove one at a time until it stops going up
    if starting_features is None:
        last_best_score = - 1000
        starting_features = dataframe.columns.tolist()
    else:
        last_best_score = model_score(dataframe, starting_features, _max_iter=5000)
        print(f'Starting with features: {starting_features}')
        print(f'Current best score: {last_best_score}')
    # remove result from list 
    if 'result' in starting_features:
        starting_features.remove('result')
    current_best_score = model_score(dataframe, starting_features, _max_iter=5000)
    features = starting_features.copy()
    while current_best_score > last_best_score:
        last_best_score = current_best_score
        new_features, current_best_score = greedy(dataframe, features, subsetsize=len(features)-1)
        features = new_features
        print(f'Current best score: {current_best_score}')
        print(f'Current best subset: {features}')
        
    print(f'No improvement found, stopping greedy search.')
    best_score = last_best_score
    print(f'Final best score: {best_score}')
    return features

def additive_greedy(dataframe, current_best_feature_set=[], search_doubles=False):
    # now start adding remaining features back in one by one or in groups of two or three and see if we can improve the score
    if current_best_feature_set:
        current_model_score = model_score(dataframe, current_best_feature_set, _max_iter=5000)
        print(f'Starting with current best feature set: {current_best_feature_set}')
        print(f'Current model score: {current_model_score}')
    else:
        print('Starting with an empty feature set.')
        current_model_score = -1000 
        
    while True:
        unused_features = set(dataframe.columns) - set(current_best_feature_set) - set(['result'])
        # first try adding one feature 
        single_feature_scores = {}
        for feature in unused_features:
            new_feature_set = current_best_feature_set + [feature]
            new_model_score = model_score(dataframe, new_feature_set, _max_iter=5000)
            single_feature_scores[tuple(new_feature_set)] = new_model_score
        best_single_feature_set = max(single_feature_scores, key=single_feature_scores.get)
        best_single_feature_score = single_feature_scores[best_single_feature_set]
        if best_single_feature_score > current_model_score:
            current_best_feature_set = list(best_single_feature_set)
            current_model_score = best_single_feature_score
            new_model_accuracy = model_score(dataframe, current_best_feature_set, _max_iter=5000, scoring='accuracy', scaled=False)
            print(f'Added single feature: {best_single_feature_set[-1]}')
            print(f'New model score: {current_model_score}')
            print(f'New model accuracy: {new_model_accuracy}')
            continue
        else:
            print('No improvement found with single addition, trying pairs.')
        if not search_doubles:
            print('Stopping search for pairs (doubles flag set to false in inputs).')
            break
        double_feature_scores = {}
        for feature1, feature2 in itertools.combinations(unused_features, 2):
            new_feature_set = current_best_feature_set + [feature1, feature2]
            new_model_score = model_score(dataframe, new_feature_set, _max_iter=5000)
            double_feature_scores[tuple(new_feature_set)] = new_model_score
        best_double_feature_set = max(double_feature_scores, key=double_feature_scores.get)
        best_double_feature_score = double_feature_scores[best_double_feature_set]
        if best_double_feature_score > current_model_score:
            current_best_feature_set = list(best_double_feature_set)
            current_model_score = best_double_feature_score
            print(f'Added double feature: {best_double_feature_set[-2]}, {best_double_feature_set[-1]}')
            print(f'New model score: {current_model_score}')
            continue
        else:
            print('No improvement found with double addition, stopping.')
            break
    return current_best_feature_set

In [36]:
ufc_fights_predictive_flattened_diffs_path = f'{git_root}/src/content/data/processed/ufc_fights_predictive_flattened_diffs.csv'
ufc_fights_predictive_flattened_diffs = pd.read_csv(ufc_fights_predictive_flattened_diffs_path)
ufc_fights_predictive_flattened_diffs = ufc_fights_predictive_flattened_diffs.dropna(axis=0, how='any')
y = (ufc_fights_predictive_flattened_diffs['result'] == 'W').values
# drop the 'fighter_result' column and opponent_result column as it is not needed for the model and also fighter and opponent columns
X = ufc_fights_predictive_flattened_diffs.drop(columns=['fighter', 'opponent'])
X['result'] = (X['result'] == 'W').astype(int)  # Convert result to binary (1 for win, 0 for loss)

# try this later
# ufc_fights_predictive_doubled_path = f'{git_root}/src/content/data/processed/ufc_fights_predictive_doubled.csv'
# ufc_fights_predictive_doubled = pd.read_csv(ufc_fights_predictive_doubled_path)

In [37]:
X.columns

Index(['result', 'age_diff', 'height_diff', 'reach_diff', 'all_wins_diff',
       'l1y_wins_diff', 'l2y_wins_diff', 'l3y_wins_diff', 'l5y_wins_diff',
       'all_wins_ko_diff',
       ...
       'all_offensive_grappling_score_diff',
       'l1y_offensive_grappling_score_diff',
       'l2y_offensive_grappling_score_diff',
       'l3y_offensive_grappling_score_diff',
       'l5y_offensive_grappling_score_diff',
       'all_defensive_grappling_loss_diff',
       'l1y_defensive_grappling_loss_diff',
       'l2y_defensive_grappling_loss_diff',
       'l3y_defensive_grappling_loss_diff',
       'l5y_defensive_grappling_loss_diff'],
      dtype='object', length=379)

# NOTE A LARGER VALUE IS BETTER FOR NEG LOG LOSS

In [38]:
best_additive_features = additive_greedy(X, search_doubles=False)

Starting with an empty feature set.
Added single feature: age_diff
New model score: -0.676350026943854
New model accuracy: 0.5880313199105145
Added single feature: l1y_inf_clinch_strikes_attempts_per_min_diff
New model score: -0.6662577559738222
New model accuracy: 0.5712192393736018
Added single feature: l5y_inf_head_strikes_accuracy_diff
New model score: -0.6586688599908539
New model accuracy: 0.5879418344519015
Added single feature: all_wins_diff
New model score: -0.6539915967519596
New model accuracy: 0.5963758389261745
Added single feature: l5y_abs_total_strikes_accuracy_diff
New model score: -0.6489530671272371
New model accuracy: 0.5929977628635347
Added single feature: all_abs_leg_strikes_accuracy_diff
New model score: -0.6444259120130975
New model accuracy: 0.6231767337807606
Added single feature: l2y_abs_leg_strikes_accuracy_diff
New model score: -0.6392055464511268
New model accuracy: 0.6181319910514542
Added single feature: l5y_inf_body_strikes_accuracy_diff
New model score

In [39]:
# remove one at a time until it stops going up
best_reductive_subset = reductive_greedy(X, starting_features=best_additive_features)

Starting with features: ['age_diff', 'l1y_inf_clinch_strikes_attempts_per_min_diff', 'l5y_inf_head_strikes_accuracy_diff', 'all_wins_diff', 'l5y_abs_total_strikes_accuracy_diff', 'all_abs_leg_strikes_accuracy_diff', 'l2y_abs_leg_strikes_accuracy_diff', 'l5y_inf_body_strikes_accuracy_diff', 'l1y_abs_reversals_per_min_diff', 'l2y_abs_clinch_strikes_accuracy_diff', 'l1y_inf_leg_strikes_accuracy_diff', 'all_wins_dec_diff', 'all_inf_takedowns_accuracy_diff', 'l1y_wins_ko_diff', 'l1y_inf_ground_strikes_accuracy_diff', 'l3y_inf_ground_strikes_accuracy_diff', 'l1y_abs_distance_strikes_attempts_per_min_diff', 'l1y_abs_body_strikes_landed_per_min_diff', 'l1y_inf_distance_strikes_landed_per_min_diff', 'l3y_abs_takedowns_accuracy_diff', 'all_abs_takedowns_accuracy_diff', 'l3y_inf_distance_strikes_accuracy_diff', 'l1y_offensive_standing_striking_score_diff', 'l2y_wins_sub_diff', 'all_abs_body_strikes_accuracy_diff', 'l5y_abs_body_strikes_accuracy_diff', 'l2y_defensive_grappling_loss_diff', 'l3y_abs

In [40]:
# do additive again
best_additive_features = additive_greedy(X, current_best_feature_set=best_reductive_subset, search_doubles=False)

Starting with current best feature set: ['age_diff', 'l1y_inf_clinch_strikes_attempts_per_min_diff', 'l5y_inf_head_strikes_accuracy_diff', 'all_wins_diff', 'l5y_abs_total_strikes_accuracy_diff', 'all_abs_leg_strikes_accuracy_diff', 'l2y_abs_leg_strikes_accuracy_diff', 'l5y_inf_body_strikes_accuracy_diff', 'l1y_abs_reversals_per_min_diff', 'l2y_abs_clinch_strikes_accuracy_diff', 'l1y_inf_leg_strikes_accuracy_diff', 'all_wins_dec_diff', 'all_inf_takedowns_accuracy_diff', 'l1y_wins_ko_diff', 'l1y_inf_ground_strikes_accuracy_diff', 'l3y_inf_ground_strikes_accuracy_diff', 'l1y_abs_distance_strikes_attempts_per_min_diff', 'l1y_abs_body_strikes_landed_per_min_diff', 'l1y_inf_distance_strikes_landed_per_min_diff', 'l3y_abs_takedowns_accuracy_diff', 'all_abs_takedowns_accuracy_diff', 'l3y_inf_distance_strikes_accuracy_diff', 'l1y_offensive_standing_striking_score_diff', 'l2y_wins_sub_diff', 'all_abs_body_strikes_accuracy_diff', 'l5y_abs_body_strikes_accuracy_diff', 'l2y_defensive_grappling_loss

In [44]:
# now try additive with double features
best_additive_features = additive_greedy(X, current_best_feature_set=best_reductive_subset, search_doubles=True)

Starting with current best feature set: ['age_diff', 'l1y_inf_clinch_strikes_attempts_per_min_diff', 'l5y_inf_head_strikes_accuracy_diff', 'all_wins_diff', 'l5y_abs_total_strikes_accuracy_diff', 'all_abs_leg_strikes_accuracy_diff', 'l2y_abs_leg_strikes_accuracy_diff', 'l5y_inf_body_strikes_accuracy_diff', 'l1y_abs_reversals_per_min_diff', 'l2y_abs_clinch_strikes_accuracy_diff', 'l1y_inf_leg_strikes_accuracy_diff', 'all_wins_dec_diff', 'all_inf_takedowns_accuracy_diff', 'l1y_wins_ko_diff', 'l1y_inf_ground_strikes_accuracy_diff', 'l3y_inf_ground_strikes_accuracy_diff', 'l1y_abs_distance_strikes_attempts_per_min_diff', 'l1y_abs_body_strikes_landed_per_min_diff', 'l1y_inf_distance_strikes_landed_per_min_diff', 'l3y_abs_takedowns_accuracy_diff', 'all_abs_takedowns_accuracy_diff', 'l3y_inf_distance_strikes_accuracy_diff', 'l1y_offensive_standing_striking_score_diff', 'l2y_wins_sub_diff', 'all_abs_body_strikes_accuracy_diff', 'l5y_abs_body_strikes_accuracy_diff', 'l2y_defensive_grappling_loss

In [46]:
best_model_score = model_score(X, best_additive_features, _max_iter=5000)
print(f'Best model score: {best_model_score}')
print(f'Probability to observe data given the model: {np.exp(best_model_score)}')
accuracy = model_score(X, best_additive_features, _max_iter=5000, scoring='accuracy')
print(f'Accuracy of the model: {accuracy}')

Best model score: -0.5847483354859986
Probability to observe data given the model: 0.5572460832317718
Accuracy of the model: 0.6767002237136466


In [48]:
# WOW THIS IS AMAZING
print('Best additive features:')
print('-------------------------')
print(f'Number of best additive features: {len(best_additive_features)}')
for col in best_additive_features:
    print(col)

Best additive features:
-------------------------
Number of best additive features: 77
age_diff
l1y_inf_clinch_strikes_attempts_per_min_diff
l5y_inf_head_strikes_accuracy_diff
all_wins_diff
l5y_abs_total_strikes_accuracy_diff
all_abs_leg_strikes_accuracy_diff
l2y_abs_leg_strikes_accuracy_diff
l5y_inf_body_strikes_accuracy_diff
l1y_abs_reversals_per_min_diff
l2y_abs_clinch_strikes_accuracy_diff
l1y_inf_leg_strikes_accuracy_diff
all_wins_dec_diff
all_inf_takedowns_accuracy_diff
l1y_wins_ko_diff
l1y_inf_ground_strikes_accuracy_diff
l3y_inf_ground_strikes_accuracy_diff
l1y_abs_distance_strikes_attempts_per_min_diff
l1y_abs_body_strikes_landed_per_min_diff
l1y_inf_distance_strikes_landed_per_min_diff
l3y_abs_takedowns_accuracy_diff
all_abs_takedowns_accuracy_diff
l3y_inf_distance_strikes_accuracy_diff
l1y_offensive_standing_striking_score_diff
l2y_wins_sub_diff
all_abs_body_strikes_accuracy_diff
l5y_abs_body_strikes_accuracy_diff
l2y_defensive_grappling_loss_diff
l3y_abs_body_strikes_accura

In [50]:
amazing_feature_set = [
    'age_diff',
    'l1y_inf_clinch_strikes_attempts_per_min_diff',
    'l5y_inf_head_strikes_accuracy_diff',
    'all_wins_diff',
    'l5y_abs_total_strikes_accuracy_diff',
    'all_abs_leg_strikes_accuracy_diff',
    'l2y_abs_leg_strikes_accuracy_diff',
    'l5y_inf_body_strikes_accuracy_diff',
    'l1y_abs_reversals_per_min_diff',
    'l2y_abs_clinch_strikes_accuracy_diff',
    'l1y_inf_leg_strikes_accuracy_diff',
    'all_wins_dec_diff',
    'all_inf_takedowns_accuracy_diff',
    'l1y_wins_ko_diff',
    'l1y_inf_ground_strikes_accuracy_diff',
    'l3y_inf_ground_strikes_accuracy_diff',
    'l1y_abs_distance_strikes_attempts_per_min_diff',
    'l1y_abs_body_strikes_landed_per_min_diff',
    'l1y_inf_distance_strikes_landed_per_min_diff',
    'l3y_abs_takedowns_accuracy_diff',
    'all_abs_takedowns_accuracy_diff',
    'l3y_inf_distance_strikes_accuracy_diff',
    'l1y_offensive_standing_striking_score_diff',
    'l2y_wins_sub_diff',
    'all_abs_body_strikes_accuracy_diff',
    'l5y_abs_body_strikes_accuracy_diff',
    'l2y_defensive_grappling_loss_diff',
    'l3y_abs_body_strikes_accuracy_diff',
    'all_inf_knockdowns_per_min_diff',
    'l3y_defensive_grappling_loss_diff',
    'l2y_inf_head_strikes_attempts_per_min_diff',
    'l3y_inf_head_strikes_attempts_per_min_diff',
    'l1y_abs_total_strikes_landed_per_min_diff',
    'l3y_abs_leg_strikes_accuracy_diff',
    'l5y_abs_sig_strikes_accuracy_diff',
    'l2y_inf_sig_strikes_accuracy_diff',
    'l2y_inf_head_strikes_landed_per_min_diff',
    'l1y_abs_ground_strikes_accuracy_diff',
    'l1y_inf_sig_strikes_accuracy_diff',
    'l3y_inf_reversals_per_min_diff',
    'all_inf_reversals_per_min_diff',
    'l2y_wins_ko_diff',
    'l1y_inf_head_strikes_landed_per_min_diff',
    'l2y_inf_ground_strikes_accuracy_diff',
    'l3y_abs_takedowns_attempts_per_min_diff',
    'all_abs_takedowns_landed_per_min_diff',
    'l1y_abs_takedowns_attempts_per_min_diff',
    'l5y_inf_takedowns_attempts_per_min_diff',
    'l3y_inf_takedowns_attempts_per_min_diff',
    'l5y_inf_knockdowns_per_min_diff',
    'l3y_num_fights_diff',
    'l1y_inf_clinch_strikes_landed_per_min_diff',
    'l1y_inf_clinch_strikes_accuracy_diff',
    'l3y_inf_distance_strikes_attempts_per_min_diff',
    'l2y_num_fights_diff',
    'l5y_inf_ground_strikes_accuracy_diff',
    'all_abs_head_strikes_landed_per_min_diff',
    'l5y_abs_head_strikes_landed_per_min_diff',
    'l1y_abs_sig_strikes_attempts_per_min_diff',
    'l3y_abs_head_strikes_attempts_per_min_diff',
    'l5y_abs_takedowns_landed_per_min_diff',
    'l5y_inf_leg_strikes_attempts_per_min_diff',
    'l3y_inf_leg_strikes_landed_per_min_diff',
    'l1y_inf_distance_strikes_attempts_per_min_diff',
    'l1y_wins_dec_diff',
    'l3y_losses_dec_diff',
    'l2y_losses_ko_diff',
    'l5y_losses_ko_diff',
    'all_inf_leg_strikes_attempts_per_min_diff',
    'l5y_inf_leg_strikes_accuracy_diff',
    'l3y_inf_clinch_strikes_landed_per_min_diff',
    'l5y_inf_clinch_strikes_landed_per_min_diff',
    'l1y_abs_clinch_strikes_landed_per_min_diff',
    'l3y_abs_clinch_strikes_landed_per_min_diff',
    'l1y_abs_clinch_strikes_accuracy_diff',
    'l1y_abs_ground_strikes_attempts_per_min_diff',
    'l5y_abs_clinch_strikes_landed_per_min_diff',
]

In [51]:
# try removing features again 
best_reductive_subset = reductive_greedy(X, starting_features=amazing_feature_set)

Starting with features: ['age_diff', 'l1y_inf_clinch_strikes_attempts_per_min_diff', 'l5y_inf_head_strikes_accuracy_diff', 'all_wins_diff', 'l5y_abs_total_strikes_accuracy_diff', 'all_abs_leg_strikes_accuracy_diff', 'l2y_abs_leg_strikes_accuracy_diff', 'l5y_inf_body_strikes_accuracy_diff', 'l1y_abs_reversals_per_min_diff', 'l2y_abs_clinch_strikes_accuracy_diff', 'l1y_inf_leg_strikes_accuracy_diff', 'all_wins_dec_diff', 'all_inf_takedowns_accuracy_diff', 'l1y_wins_ko_diff', 'l1y_inf_ground_strikes_accuracy_diff', 'l3y_inf_ground_strikes_accuracy_diff', 'l1y_abs_distance_strikes_attempts_per_min_diff', 'l1y_abs_body_strikes_landed_per_min_diff', 'l1y_inf_distance_strikes_landed_per_min_diff', 'l3y_abs_takedowns_accuracy_diff', 'all_abs_takedowns_accuracy_diff', 'l3y_inf_distance_strikes_accuracy_diff', 'l1y_offensive_standing_striking_score_diff', 'l2y_wins_sub_diff', 'all_abs_body_strikes_accuracy_diff', 'l5y_abs_body_strikes_accuracy_diff', 'l2y_defensive_grappling_loss_diff', 'l3y_abs

In [52]:
# probability of observing data given the model
best_model_score = model_score(X, best_reductive_subset, _max_iter=5000)
print(f'Probability to observe data given the model: {np.exp(best_model_score)}')

Probability to observe data given the model: 0.5572460832317718


# WHY IS THE SCORE DIFFERENT THAN FROM ABOVE???

In [None]:
def ufc_prediction_tuple(fighter1,fighter2,day1=date.today(),day2=date.today()):
    # TODO rescale the features to zero mean and unit variance
    
    # return [fighter_score_diff(fighter1,fighter2,day1, 4),
    #         fighter_score_diff(fighter1,fighter2,day1, 9),
    #         fighter_score_diff(fighter1,fighter2,day1, 15),
    #         fight_math_diff(fighter1,fighter2,day1, 1),
    #         fight_math_diff(fighter1,fighter2,day1, 6),
    #         L5Y_sub_wins(fighter1,day1)-L5Y_sub_wins(fighter2,day2),
    #         L5Y_losses(fighter1,day1)-L5Y_losses(fighter2,day2),
    #         L5Y_ko_losses(fighter1,day1)-L5Y_ko_losses(fighter2,day2),
    #         fighter_age(fighter1,day1)-fighter_age(fighter2,day2),
    #         avg_count('total_strikes_landed',fighter1,'abs',day1)-avg_count('total_strikes_landed',fighter2,'abs',day2),
    #         avg_count('head_strikes_landed',fighter1,'abs',day1)-avg_count('head_strikes_landed',fighter2,'abs',day2),
    #         avg_count('ground_strikes_landed',fighter1,'inf',day1)-avg_count('ground_strikes_landed',fighter2,'inf',day2),
    #         avg_count('takedowns_attempts',fighter1,'inf',day1)-avg_count('takedowns_attempts',fighter2,'inf',day2),
    #         avg_count('head_strikes_landed',fighter1,'inf',day1)-avg_count('head_strikes_landed',fighter2,'inf',day2),
    #     ]
    return [
    # 'fighter_age_diff',
    #     'height_diff',
    #     'reach_diff',
    #     'fighter_L5Y_wins_diff_2',
    #     'fighter_L5Y_losses_diff_2',
    #     'fighter_ko_losses_diff_2',
    #     'fighter_L5Y_ko_wins_diff_2',
    #     'fighter_L2Y_ko_losses_diff_2',
        fighter_age(fighter1, day1) - fighter_age(fighter2, day2),
        fighter_height(fighter1) - fighter_height(fighter2),
        fighter_reach(fighter1) - fighter_reach(fighter2),
        L5Y_wins(fighter1, day1) - L5Y_wins(fighter2, day2),
        L5Y_losses(fighter1, day1) - L5Y_losses(fighter2, day2),
        ko_losses(fighter1, day1) - ko_losses(fighter2, day2),
        L5Y_ko_wins(fighter1, day1) - L5Y_ko_wins(fighter2, day2),
        L2Y_ko_losses(fighter1, day1) - L2Y_ko_losses(fighter2, day2),
        #     'fighter_L5Y_ko_losses_diff_2',
    #     'fighter_sub_wins_diff_2',
    #     'fighter_L2Y_sub_losses_diff_2',
    #     'fighter_inf_knockdowns_avg_diff_2',
        L2Y_ko_losses(fighter1, day1) - L2Y_ko_losses(fighter2, day2),
        sub_wins(fighter1, day1) - sub_wins(fighter2, day2),
        L2Y_sub_losses(fighter1, day1) - L2Y_sub_losses(fighter2, day2),
        avg_count('knockdowns', fighter1, 'inf', day1) - avg_count('knockdowns', fighter2, 'inf', day2),
    
    #     'fighter_inf_distance_strikes_landed_avg_diff_2',
        avg_count('distance_strikes_landed', fighter1, 'inf', day1) - avg_count('distance_strikes_landed', fighter2, 'inf', day2),
    #     'fighter_inf_ground_strikes_landed_avg_diff_2',
        avg_count('ground_strikes_landed', fighter1, 'inf', day1) - avg_count('ground_strikes_landed', fighter2, 'inf', day2),
    #     'fighter_inf_distance_strikes_attempts_avg_diff_2',
        avg_count('distance_strikes_attempts', fighter1, 'inf', day1) - avg_count('distance_strikes_attempts', fighter2, 'inf', day2),
        
        
        #     'fighter_inf_head_strikes_attempts_avg_diff_2',
        avg_count('head_strikes_attempts', fighter1, 'inf', day1) - avg_count('head_strikes_attempts', fighter2, 'inf', day2),
    #     'fighter_inf_takedowns_attempts_avg_diff_2',
        avg_count('takedowns_attempts', fighter1, 'inf', day1) - avg_count('takedowns_attempts', fighter2, 'inf', day2),
    #     'fighter_abs_reversals_avg_diff_2',
        # avg_count('reversals', fighter1, 'abs', day1) - avg_count('reversals', fighter2, 'abs', day2),
    #     'fighter_abs_clinch_strikes_landed_avg_diff_2',
        avg_count('clinch_strikes_landed', fighter1, 'abs', day1) - avg_count('clinch_strikes_landed', fighter2, 'abs', day2),
    #     'fighter_abs_distance_strikes_landed_avg_diff_2',
        avg_count('distance_strikes_landed', fighter1, 'abs', day1) - avg_count('distance_strikes_landed', fighter2, 'abs', day2),
        
            #     'fighter_abs_leg_strikes_landed_avg_diff_2',
        avg_count('leg_strikes_landed', fighter1, 'abs', day1) - avg_count('leg_strikes_landed', fighter2, 'abs', day2),
    #     'fighter_abs_takedowns_attempts_avg_diff_2',
        avg_count('takedowns_attempts', fighter1, 'abs', day1) - avg_count('takedowns_attempts', fighter2, 'abs', day2),
    #     'fighter_abs_distance_strikes_attempts_avg_diff_2',
        avg_count('distance_strikes_attempts', fighter1, 'abs', day1) - avg_count('distance_strikes_attempts', fighter2, 'abs', day2),
    #     'fighter_abs_head_strikes_attempts_avg_diff_2',
        avg_count('head_strikes_attempts', fighter1, 'abs', day1) - avg_count('head_strikes_attempts', fighter2, 'abs', day2),
    #     'fighter_abs_total_strikes_attempts_avg_diff_2',
        avg_count('total_strikes_attempts', fighter1, 'abs', day1) - avg_count('total_strikes_attempts', fighter2, 'abs', day2),
                #     '6-fight_math',
    #     '9-fighter_score_diff',
    #     '4-fighter_score_diff',
        fight_math_diff(fighter1, fighter2, day1, 6),
        fighter_score_diff(fighter1, fighter2, day1, 9),
        fighter_score_diff(fighter1, fighter2, day1, 4),
    ]
    

In [None]:
dataframe = ufc_fights_winner
iloc_val = 3200
_max_iter = 2000
scoring='neg_log_loss'
scaled=True

yyy=dataframe['result'].iloc[0:iloc_val]
XXX=dataframe[current_best_feature_set].iloc[0:iloc_val]
XXXscaler = preprocessing.StandardScaler().fit(XXX)
XXX_scaled = XXXscaler.transform(XXX) 
X = XXX_scaled if scaled else XXX
winPredictionModel=LogisticRegression(solver='lbfgs', max_iter=_max_iter, fit_intercept=False)
# find the cross val score with log loss
cross_val_score(winPredictionModel,X,yyy,cv=4,scoring=scoring).mean()

In [None]:
tup = ufc_prediction_tuple('Cody Garbrandt', 'Pedro Munhoz', 'July 5, 2023', 'July 5, 2023')

In [None]:
tup_scaled = XXXscaler.transform(np.array(tup).reshape(1, -1))

In [None]:
tup_scaled[0]