In [1]:
from graph import Graph, Graph3D
from functions import *
import pandas as pd
import numpy as np

#getting dependencies
pd.options.mode.chained_assignment = None  # default='warn' (disables SettingWithCopyWarning)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
from datetime import datetime
from datetime import date
import matplotlib.pyplot as plt
import random
import sklearn
import scipy
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures
from sklearn.feature_selection import VarianceThreshold
import itertools

ufc_fights = pd.read_csv('ufc_fights.csv',low_memory=False)
ufc_fights_graph = pd.read_csv('ufc_fights_crap.csv',low_memory=False)
odd_indices = range(1,len(ufc_fights_graph.index),2)
ufc_fights_graph = ufc_fights_graph.drop(odd_indices)
ufc_fights_graph = ufc_fights_graph[['fighter', 'opponent', 'method', 'date', 'division']]
ufc_fights_graph = ufc_fights_graph.reset_index(drop=True);
ufc_wins_list=[]

for i in ufc_fights_graph.index:
    temp_list = []
    temp_list.append(ufc_fights_graph['fighter'][i])
    temp_list.append(ufc_fights_graph['opponent'][i])
    temp_list.append(ufc_fights_graph['date'][i])
    temp_list.append(ufc_fights_graph['division'][i])
    ufc_wins_list.append(temp_list)
    
#need to define variable "years" before calling this function
def fight_math(fighter,opponent,date):
    fighter_advantage = 0
    ufc_wins_list_l5y = [fight for fight in ufc_wins_list if 0<time_diff(fight[2],date)<years*365]
    fighter_wins = [fight[1] for fight in ufc_wins_list_l5y if fight[0]==fighter]
    fighter_wins.append(fighter)
    fighter_wins_wins = [fight[1] for fight in ufc_wins_list_l5y if fight[0] in fighter_wins]
    relevant_wins = list(set(fighter_wins+fighter_wins_wins))
    fight_math_wins = [fight for fight in ufc_wins_list_l5y if (fight[0] in relevant_wins and fight[1]==opponent)]
    fighter_advantage+=len(fight_math_wins)
    return fighter_advantage

def fight_math_diff(fighter,opponent,date):
    return fight_math(fighter,opponent,date)-fight_math(opponent,fighter,date)
fight_math_diff_vect = np.vectorize(fight_math_diff)

#need to define variable "years" before calling this function
#perhaps a better score would weight more recent fights more strongly (weight drops by 1/3 every year?)
def fighter_score(fighter, date):
    fighter_score = 0
    ufc_wins_list_l5y = [fight for fight in ufc_wins_list if 0<time_diff(fight[2],date)<years*365]
    #calculating contribution from wins
    fighter_wins = [fight[1] for fight in ufc_wins_list_l5y if fight[0]==fighter]
    fighter_wins_wins = [fight[1] for fight in ufc_wins_list_l5y if fight[0] in fighter_wins]
    relevant_wins = list(set(fighter_wins+fighter_wins_wins))
    #calculating contribution from losses
    fighter_losses = [fight[0] for fight in ufc_wins_list_l5y if fight[1]==fighter]
    fighter_losses_losses = [fight[0] for fight in ufc_wins_list_l5y if fight[1] in fighter_wins]
    relevant_losses = list(set(fighter_losses+fighter_losses_losses))
    return len(relevant_wins)-len(relevant_losses)

def fighter_score_diff(fighter,opponent,date):
    return fighter_score(fighter,date)-fighter_score(opponent,date)

fighter_score_diff_vect = np.vectorize(fighter_score_diff)

def best_fighter(date):
    candidates=[]
    for name in ufcfighterscrap['name']:
        candidates.append((name,fighter_score(name,date)))
    highest_score=max([candidates[i][1] for i in range(len(candidates))])   
    best_fighters = [candidates[j] for j in range(len(candidates)) if candidates[j][1]==highest_score]
    return best_fighters
    
#scores a model
def model_score(dataframe,features, iloc_val = 3200, _max_iter = 2000):
    yyy=dataframe['result'].iloc[0:iloc_val]
    XXX=dataframe[features].iloc[0:iloc_val]
    XXXscaler = preprocessing.StandardScaler().fit(XXX)
    XXX_scaled = XXXscaler.transform(XXX) 
    winPredictionModel=LogisticRegression(solver='lbfgs', max_iter=_max_iter)
    return cross_val_score(winPredictionModel,XXX_scaled,yyy,cv=4).mean()
    
#CODE FOR THE GREEDY ALGORITHM FOR FEATURE SELECTION
def greedy(dataframe, features, subsetsize, iloc_val=3200, _max_iter = 2000):
    i=0
    best_stats=[]
    s=set(features)
    subsets=list(map(set, itertools.combinations(s, subsetsize))) #subsets of size (subsetsize)
    possible_stat_dict = {}
    scores={0:0}
    for stat_pair in subsets:
        possible_stat_dict[tuple(stat_pair)]=0
    while (i==0) or (scores[i]>scores[i-1]):
        i+=1
        for stat_pair in list(possible_stat_dict.keys()):
            stats_temp = best_stats+list(stat_pair)
            possible_stat_dict[tuple(stat_pair)]=model_score(ufc_fights,stats_temp,iloc_val,_max_iter)
        max_key = max(possible_stat_dict, key=possible_stat_dict.get)
        best_stats.extend(list(max_key))
        scores[i]=possible_stat_dict[max_key]
        possible_stat_dict.pop(max_key)
        print(best_stats,scores[i])
    return (best_stats[:-subsetsize], scores[i-1])

In [2]:
ufcfighterscrap=pd.read_csv('fighter_stats.csv',sep=',')

In [11]:
#investigating predictability of fight math depending on the number of past years to use in the calculation
for i in range(1,10):    
    years=i
    ufc_fights[str(years)+'-fight_math'] = fight_math_diff_vect(ufc_fights['fighter'],
                                               ufc_fights['opponent'],
                                               ufc_fights['date'])
    #gives all indices when fight math guesses correct
    fight_math_correct = [i for i in ufc_fights.index if 
                          ((ufc_fights[str(years)+'-fight_math'][i]>0 and ufc_fights['result'][i]=='W') or 
                          (ufc_fights[str(years)+'-fight_math'][i]<0 and ufc_fights['result'][i]=='L'))]
    #gives all indices when fight math is nonzero
    fight_math_relevant = [i for i in ufc_fights.index if ufc_fights[str(years)+'-fight_math'][i]!=0]

    # years = 5 ----------> .578
    #percentage of the time that fight math guesses the correct fighter when nonzero
    print(str(years)+'-year fight math accuracy: '+str(len(fight_math_correct)/len(fight_math_relevant)))

    # years = 5 ----------> .154
    #percentage of the time fight math is nonzero
    print(str(years)+'-percentage of the time fight math is nonzero: '
          +str(len(fight_math_relevant)/len(ufc_fights.index)))
    
    

1-year fight math accuracy: 0.6891891891891891
1-percentage of the time fight math is nonzero: 0.02011416145691764
2-year fight math accuracy: 0.6270627062706271
2-percentage of the time fight math is nonzero: 0.0823593367762979


KeyboardInterrupt: 

In [58]:
#this uses the last 2 years to compute fighter scores
years = 2
for i in range(30):
    date = 'December 25, '+str(1993+i)
    best = best_fighter(date)
    print('the best fighter(s) in '+str(1993+i)+' was '+str(best[0][0])+' with a score of '+str(best[0][1]))

the best fighter(s) in 1993 was Royce Gracie with a score of 5
the best fighter(s) in 1994 was Royce Gracie with a score of 24
the best fighter(s) in 1995 was Royce Gracie with a score of 22
the best fighter(s) in 1996 was Dan Severn with a score of 15
the best fighter(s) in 1997 was Mark Coleman with a score of 10
the best fighter(s) in 1998 was Randy Couture with a score of 10
the best fighter(s) in 1999 was Frank Shamrock with a score of 7
the best fighter(s) in 2000 was Kevin Randleman with a score of 7
the best fighter(s) in 2001 was Tito Ortiz with a score of 7
the best fighter(s) in 2002 was Jens Pulver with a score of 7
the best fighter(s) in 2003 was Matt Hughes with a score of 8
the best fighter(s) in 2004 was Yves Edwards with a score of 6
the best fighter(s) in 2005 was Rich Franklin with a score of 9
the best fighter(s) in 2006 was Anderson Silva with a score of 11
the best fighter(s) in 2007 was Rashad Evans with a score of 14
the best fighter(s) in 2008 was Anderson Silv

In [57]:
#this uses the last 5 years to compute fighter scores
years = 5
for i in range(30):
    date = 'December 25, '+str(1993+i)
    best = best_fighter(date)
    print('the best fighter(s) in '+str(1993+i)+' was '+str(best[0][0])+' with a score of '+str(best[0][1]))

the best fighter(s) in 1993 was Royce Gracie with a score of 5
the best fighter(s) in 1994 was Royce Gracie with a score of 24
the best fighter(s) in 1995 was Royce Gracie with a score of 31
the best fighter(s) in 1996 was Royce Gracie with a score of 31
the best fighter(s) in 1997 was Royce Gracie with a score of 30
the best fighter(s) in 1998 was Royce Gracie with a score of 29
the best fighter(s) in 1999 was Royce Gracie with a score of 28
the best fighter(s) in 2000 was Royce Gracie with a score of 27
the best fighter(s) in 2001 was Royce Gracie with a score of 27
the best fighter(s) in 2002 was Tito Ortiz with a score of 24
the best fighter(s) in 2003 was Randy Couture with a score of 26
the best fighter(s) in 2004 was Randy Couture with a score of 28
the best fighter(s) in 2005 was Chuck Liddell with a score of 32
the best fighter(s) in 2006 was Chuck Liddell with a score of 29
the best fighter(s) in 2007 was Georges St-Pierre with a score of 34
the best fighter(s) in 2008 was Ge

In [61]:
#this uses the last 10 years to compute fighter scores
years = 10
for i in range(30):
    date = 'December 25, '+str(1993+i)
    best = best_fighter(date)
    print('the best fighter(s) in '+str(1993+i)+' was '+str(best[0][0])+' with a score of '+str(best[0][1]))

the best fighter(s) in 1993 was Royce Gracie with a score of 5
the best fighter(s) in 1994 was Royce Gracie with a score of 24
the best fighter(s) in 1995 was Royce Gracie with a score of 31
the best fighter(s) in 1996 was Royce Gracie with a score of 31
the best fighter(s) in 1997 was Royce Gracie with a score of 30
the best fighter(s) in 1998 was Royce Gracie with a score of 29
the best fighter(s) in 1999 was Royce Gracie with a score of 28
the best fighter(s) in 2000 was Royce Gracie with a score of 27
the best fighter(s) in 2001 was Royce Gracie with a score of 27
the best fighter(s) in 2002 was Royce Gracie with a score of 26
the best fighter(s) in 2003 was Randy Couture with a score of 26
the best fighter(s) in 2004 was Randy Couture with a score of 28
the best fighter(s) in 2005 was Chuck Liddell with a score of 32
the best fighter(s) in 2006 was Chuck Liddell with a score of 35
the best fighter(s) in 2007 was Georges St-Pierre with a score of 34
the best fighter(s) in 2008 was 

In [None]:
# investingating the predictability of fighter score
for i in range(1,20,2):    
    years=i
    ufc_fights[str(years)+'-fighter_score_diff'] = fighter_score_diff_vect(ufc_fights['fighter'],
                                               ufc_fights['opponent'],
                                               ufc_fights['date'])
    #gives all indices when fight math guesses correct
    fighter_score_correct = [i for i in ufc_fights.index if 
                          ((ufc_fights[str(years)+'-fighter_score_diff'][i]>0 and ufc_fights['result'][i]=='W') or 
                          (ufc_fights[str(years)+'-fighter_score_diff'][i]<0 and ufc_fights['result'][i]=='L'))]
    #gives all indices when fight math is nonzero
    fighter_score_relevant = [i for i in ufc_fights.index if ufc_fights[str(years)+'-fighter_score_diff'][i]!=0]

    # years = 5 ----------> .578
    #percentage of the time that fight math guesses the correct fighter when nonzero
    print(str(years)+'-year fighter score accuracy: '+str(len(fighter_score_correct)/len(fighter_score_relevant)))

    # years = 5 ----------> .154
    #percentage of the time fight math is nonzero
    print(str(years)+'-percentage of the time fighter score is nonzero: '
          +str(len(fighter_score_relevant)/len(ufc_fights.index)))
    
    

In [None]:
# investingating the predictability of fighter score
for i in [4,6]:    
    years=i
    ufc_fights[str(years)+'-fighter_score_diff'] = fighter_score_diff_vect(ufc_fights['fighter'],
                                               ufc_fights['opponent'],
                                               ufc_fights['date'])
    #gives all indices when fight math guesses correct
    fighter_score_correct = [i for i in ufc_fights.index if 
                          ((ufc_fights[str(years)+'-fighter_score_diff'][i]>0 and ufc_fights['result'][i]=='W') or 
                          (ufc_fights[str(years)+'-fighter_score_diff'][i]<0 and ufc_fights['result'][i]=='L'))]
    #gives all indices when fight math is nonzero
    fighter_score_relevant = [i for i in ufc_fights.index if ufc_fights[str(years)+'-fighter_score_diff'][i]!=0]

    # years = 5 ----------> .578
    #percentage of the time that fight math guesses the correct fighter when nonzero
    print(str(years)+'-year fighter score accuracy: '+str(len(fighter_score_correct)/len(fighter_score_relevant)))

    # years = 5 ----------> .154
    #percentage of the time fight math is nonzero
    print(str(years)+'-percentage of the time fighter score is nonzero: '
          +str(len(fighter_score_relevant)/len(ufc_fights.index)))

In [None]:
#Now lets use these new stats (fight_math and fighter_score) in our ML models

In [3]:
# cleans method column so 'M-DEC' and 'U-DEC' count as decision, 
# and 'S-DEC', 'doctor stoppage', 'overturned' etc counts as 'bullshit'
ufc_fights['method'] = clean_method_for_winner_vect(ufc_fights['method'])
ufc_fights['method'].unique()

array(['SUB', 'KO/TKO', 'DEC', 'bullshit'], dtype=object)

In [4]:
#WHY AM I USING u'' i.e. unicode... r'' takes less memory space. Was there a reason for using u?
#getting rid of rows with incomplete or useless data
#fights with outcome "Win" or "Loss" (no "Draw")
draw_mask=ufc_fights['result'] != 'D' 
#fights where the method of victory is TKO/SUB/DEC (no split decision or DQ or Overturned or anything else like that)
method_mask=(ufc_fights['method']!='bullshit')
#fights where age is known
age_mask=(ufc_fights['fighter_age']!='unknown')&(ufc_fights['opponent_age']!='unknown')
#fights where height reach is known
height_mask=(ufc_fights['fighter_height']!='unknown')&(ufc_fights['opponent_height']!='unknown')
reach_mask=(ufc_fights['fighter_reach']!='unknown')&(ufc_fights['opponent_reach']!='unknown')
#fights where number of wins is known
wins_mask=(ufc_fights['fighter_wins'] != 'unknown' )& (ufc_fights['opponent_wins'] != 'unknown')
#fights where both fighters have strike statistics (gets rid of UFC debuts)
strikes_mask=(ufc_fights['fighter_inf_sig_strikes_attempts_avg'] != 0)&(ufc_fights['opponent_inf_sig_strikes_attempts_avg'] != 0)
#includes only the fights satisfying these conditions
ufc_fights=ufc_fights[draw_mask&method_mask&age_mask&height_mask&reach_mask&wins_mask&strikes_mask]

#listing all stats and making some new stats from them (differences often score higher in the learning models)
record_statistics=[u'fighter_wins', u'fighter_losses', u'fighter_L5Y_wins',u'fighter_L5Y_losses', 
                   u'fighter_L2Y_wins', u'fighter_L2Y_losses',u'fighter_ko_wins',u'fighter_ko_losses',
                   u'fighter_L5Y_ko_wins',u'fighter_L5Y_ko_losses',u'fighter_L2Y_ko_wins',u'fighter_L2Y_ko_losses',
                   u'fighter_sub_wins',u'fighter_sub_losses',u'fighter_L5Y_sub_wins',u'fighter_L5Y_sub_losses', 
                   u'fighter_L2Y_sub_wins', u'fighter_L2Y_sub_losses',u'opponent_wins', u'opponent_losses',
                   u'opponent_L5Y_wins', 
                   u'opponent_L5Y_losses', 
                   u'opponent_L2Y_wins', 
                   u'opponent_L2Y_losses', 
                    u'opponent_ko_wins', 
                   u'opponent_ko_losses', 
                   u'opponent_L5Y_ko_wins', 
                   u'opponent_L5Y_ko_losses', 
                   u'opponent_L2Y_ko_wins',
                    u'opponent_L2Y_ko_losses', 
                   u'opponent_sub_wins', 
                   u'opponent_sub_losses',
                   u'opponent_L5Y_sub_wins', 
                    u'opponent_L5Y_sub_losses', 
                   u'opponent_L2Y_sub_wins', 
                   u'opponent_L2Y_sub_losses']

physical_stats=[ u'fighter_age',
                u'fighter_height',
                    u'fighter_reach',  
                u'opponent_age',  
                u'opponent_height',
                    u'opponent_reach']

#THERE MAY BE A PROBLEM IN AGE HEIGHT REACH TO DO WITH STRING VS FLOAT. MAKE SURE THESE ARE ALL THE CORRECT TYPE
#MAYBE WE ARE LOSING PREDICTABILITY HERE

#here is the list of all stats available (besides stance), does not include names or result
punch_statistics=[    u'fighter_inf_knockdowns_avg',
                    u'fighter_inf_pass_avg',
                    u'fighter_inf_reversals_avg',
                    u'fighter_inf_sub_attempts_avg',
                    u'fighter_inf_takedowns_landed_avg',
                    u'fighter_inf_takedowns_attempts_avg',
                    u'fighter_inf_sig_strikes_landed_avg',
                    u'fighter_inf_sig_strikes_attempts_avg',
                    u'fighter_inf_total_strikes_landed_avg',
                    u'fighter_inf_total_strikes_attempts_avg',
                    u'fighter_inf_head_strikes_landed_avg',
                    u'fighter_inf_head_strikes_attempts_avg',
                    u'fighter_inf_body_strikes_landed_avg',
                    u'fighter_inf_body_strikes_attempts_avg',
                    u'fighter_inf_leg_strikes_landed_avg',
                    u'fighter_inf_leg_strikes_attempts_avg',
                    u'fighter_inf_distance_strikes_landed_avg',
                    u'fighter_inf_distance_strikes_attempts_avg',
                    u'fighter_inf_clinch_strikes_landed_avg',
                    u'fighter_inf_clinch_strikes_attempts_avg',
                    u'fighter_inf_ground_strikes_landed_avg',
                    u'fighter_inf_ground_strikes_attempts_avg',
                
                    u'fighter_abs_knockdowns_avg',
                    u'fighter_abs_pass_avg',
                    u'fighter_abs_reversals_avg',
                    u'fighter_abs_sub_attempts_avg',
                    u'fighter_abs_takedowns_landed_avg',
                    u'fighter_abs_takedowns_attempts_avg',
                    u'fighter_abs_sig_strikes_landed_avg',
                    u'fighter_abs_sig_strikes_attempts_avg',
                    u'fighter_abs_total_strikes_landed_avg',
                    u'fighter_abs_total_strikes_attempts_avg',
                    u'fighter_abs_head_strikes_landed_avg',
                    u'fighter_abs_head_strikes_attempts_avg',
                    u'fighter_abs_body_strikes_landed_avg',
                    u'fighter_abs_body_strikes_attempts_avg',
                    u'fighter_abs_leg_strikes_landed_avg',
                    u'fighter_abs_leg_strikes_attempts_avg',
                    u'fighter_abs_distance_strikes_landed_avg',
                    u'fighter_abs_distance_strikes_attempts_avg',
                    u'fighter_abs_clinch_strikes_landed_avg',
                    u'fighter_abs_clinch_strikes_attempts_avg',
                    u'fighter_abs_ground_strikes_landed_avg',
                    u'fighter_abs_ground_strikes_attempts_avg',
                    
                    u'opponent_inf_knockdowns_avg',
                    u'opponent_inf_pass_avg',
                    u'opponent_inf_reversals_avg',
                    u'opponent_inf_sub_attempts_avg',
                    u'opponent_inf_takedowns_landed_avg',
                    u'opponent_inf_takedowns_attempts_avg',
                    u'opponent_inf_sig_strikes_landed_avg',
                    u'opponent_inf_sig_strikes_attempts_avg',
                    u'opponent_inf_total_strikes_landed_avg',
                    u'opponent_inf_total_strikes_attempts_avg',
                    u'opponent_inf_head_strikes_landed_avg',
                    u'opponent_inf_head_strikes_attempts_avg',
                    u'opponent_inf_body_strikes_landed_avg',
                    u'opponent_inf_body_strikes_attempts_avg',
                    u'opponent_inf_leg_strikes_landed_avg',
                    u'opponent_inf_leg_strikes_attempts_avg',
                    u'opponent_inf_distance_strikes_landed_avg',
                    u'opponent_inf_distance_strikes_attempts_avg',
                    u'opponent_inf_clinch_strikes_landed_avg',
                    u'opponent_inf_clinch_strikes_attempts_avg',
                    u'opponent_inf_ground_strikes_landed_avg',
                    u'opponent_inf_ground_strikes_attempts_avg',
                
                    u'opponent_abs_knockdowns_avg',
                    u'opponent_abs_pass_avg',
                    u'opponent_abs_reversals_avg',
                    u'opponent_abs_sub_attempts_avg',
                    u'opponent_abs_takedowns_landed_avg',
                    u'opponent_abs_takedowns_attempts_avg',
                    u'opponent_abs_sig_strikes_landed_avg',
                    u'opponent_abs_sig_strikes_attempts_avg',
                    u'opponent_abs_total_strikes_landed_avg',
                    u'opponent_abs_total_strikes_attempts_avg',
                    u'opponent_abs_head_strikes_landed_avg',
                    u'opponent_abs_head_strikes_attempts_avg',
                    u'opponent_abs_body_strikes_landed_avg',
                    u'opponent_abs_body_strikes_attempts_avg',
                    u'opponent_abs_leg_strikes_landed_avg',
                    u'opponent_abs_leg_strikes_attempts_avg',
                    u'opponent_abs_distance_strikes_landed_avg',
                    u'opponent_abs_distance_strikes_attempts_avg',
                    u'opponent_abs_clinch_strikes_landed_avg',
                    u'opponent_abs_clinch_strikes_attempts_avg',
                    u'opponent_abs_ground_strikes_landed_avg',
                    u'opponent_abs_ground_strikes_attempts_avg']


#adding record differences to ufc_fights
record_statistics_diff = []
half_length=int(len(record_statistics)/2)
for i in range(half_length):
    ufc_fights[record_statistics[i]+'_diff_2']=ufc_fights[record_statistics[i]]-ufc_fights[record_statistics[i+half_length]]
    record_statistics_diff.append(record_statistics[i]+'_diff_2')
    
#lets try and improve the greedy algorithm by considering differences. Lets start by replacing height and reach by their differences
ufc_fights['height_diff']=ufc_fights['fighter_height'].apply(float)-ufc_fights['opponent_height'].apply(float)
ufc_fights['reach_diff']=ufc_fights['fighter_reach'].apply(float)-ufc_fights['opponent_reach'].apply(float)

physical_stats_diff = ['fighter_age_diff', 'height_diff', 'reach_diff']

#adding punch differences to ufc_fights
punch_statistics_diff = []
half_length=int(len(punch_statistics)/2)
for i in range(half_length):
    ufc_fights[punch_statistics[i]+'_diff_2']=ufc_fights[punch_statistics[i]]-ufc_fights[punch_statistics[i+half_length]]
    punch_statistics_diff.append(punch_statistics[i]+'_diff_2')
    
possible_stats_diff =record_statistics_diff+physical_stats_diff+punch_statistics_diff
possible_stats_all = possible_stats_diff+record_statistics+physical_stats+punch_statistics


In [5]:
#changing columns saved as strings to floats
ufc_fights['fighter_age'] = ufc_fights['fighter_age'].apply(float)
ufc_fights['fighter_height'] = ufc_fights['fighter_height'].apply(float)
ufc_fights['fighter_reach'] = ufc_fights['fighter_reach'].apply(float)
ufc_fights['opponent_age'] = ufc_fights['opponent_age'].apply(float)
ufc_fights['opponent_height'] = ufc_fights['opponent_height'].apply(float)
ufc_fights['opponent_reach'] = ufc_fights['opponent_reach'].apply(float)
ufc_fights['fighter_stance'] = ufc_fights['fighter_stance'].apply(float)
ufc_fights['opponent_stance'] = ufc_fights['opponent_stance'].apply(float)

In [6]:
#picking a model
winPredictionModel=LogisticRegression(solver='lbfgs', max_iter=1000)

In [7]:
#adding an age_diff column 
#lets make this a function which measures something more precise, 
#like the difference in how close the fighters are to being prime fighting age
#prime fighting age
ufc_fights['fighter_age_diff'] = ufc_fights['fighter_age']-ufc_fights['opponent_age']


In [8]:
#model score with all possible stats (only differences)
model_score(ufc_fights,possible_stats_diff)

0.605

In [9]:
fight_math_stats = ['1-fight_math', '2-fight_math', '3-fight_math', '4-fight_math', '5-fight_math',
                   '6-fight_math', '7-fight_math', '8-fight_math', '9-fight_math']

fighter_score_stats = ['1-fighter_score_diff', '3-fighter_score_diff', '4-fighter_score_diff',
                      '5-fighter_score_diff', '6-fighter_score_diff', '7-fighter_score_diff', '9-fighter_score_diff',
                      '11-fighter_score_diff', '13-fighter_score_diff','15-fighter_score_diff', '17-fighter_score_diff',
                      '19-fighter_score_diff',]



In [10]:
#model score with all fight math stats
model_score(ufc_fights,fight_math_stats)

KeyError: "None of [Index(['1-fight_math', '2-fight_math', '3-fight_math', '4-fight_math',\n       '5-fight_math', '6-fight_math', '7-fight_math', '8-fight_math',\n       '9-fight_math'],\n      dtype='object')] are in the [columns]"

In [78]:
#model score with all fighter score stats
model_score(ufc_fights,fighter_score_stats)

0.5709375

In [79]:
#model score with all fight math stats and fighter score stats
model_score(ufc_fights,fight_math_stats+fighter_score_stats)

0.5728125000000001

In [83]:
#greedy score with all fight math stats
greedy(ufc_fights,fight_math_stats,1)

['4-fight_math'] 0.5215624999999999
['4-fight_math', '1-fight_math'] 0.5234375
['4-fight_math', '1-fight_math', '9-fight_math'] 0.5240625
['4-fight_math', '1-fight_math', '9-fight_math', '8-fight_math'] 0.5243749999999999
['4-fight_math', '1-fight_math', '9-fight_math', '8-fight_math', '6-fight_math'] 0.5249999999999999
['4-fight_math', '1-fight_math', '9-fight_math', '8-fight_math', '6-fight_math', '2-fight_math'] 0.5240625


(['4-fight_math',
  '1-fight_math',
  '9-fight_math',
  '8-fight_math',
  '6-fight_math'],
 0.5249999999999999)

In [84]:
#greedy score with all fight math stats
greedy(ufc_fights,fight_math_stats,2)

['1-fight_math', '4-fight_math'] 0.5234375
['1-fight_math', '4-fight_math', '8-fight_math', '9-fight_math'] 0.5243749999999999
['1-fight_math', '4-fight_math', '8-fight_math', '9-fight_math', '2-fight_math', '3-fight_math'] 0.5253125000000001
['1-fight_math', '4-fight_math', '8-fight_math', '9-fight_math', '2-fight_math', '3-fight_math', '2-fight_math', '1-fight_math'] 0.5253125000000001


(['1-fight_math',
  '4-fight_math',
  '8-fight_math',
  '9-fight_math',
  '2-fight_math',
  '3-fight_math'],
 0.5253125000000001)

In [85]:
#greedy score with all fight math stats
greedy(ufc_fights,fight_math_stats,3)

['9-fight_math', '1-fight_math', '4-fight_math'] 0.5240625
['9-fight_math', '1-fight_math', '4-fight_math', '8-fight_math', '2-fight_math', '3-fight_math'] 0.5253125000000001
['9-fight_math', '1-fight_math', '4-fight_math', '8-fight_math', '2-fight_math', '3-fight_math', '2-fight_math', '3-fight_math', '4-fight_math'] 0.5253125000000001


(['9-fight_math',
  '1-fight_math',
  '4-fight_math',
  '8-fight_math',
  '2-fight_math',
  '3-fight_math'],
 0.5253125000000001)

In [86]:
#greedy score with all fighter score stats
greedy(ufc_fights,fighter_score_stats,1)

['4-fighter_score_diff'] 0.5678124999999999
['4-fighter_score_diff', '19-fighter_score_diff'] 0.573125
['4-fighter_score_diff', '19-fighter_score_diff', '17-fighter_score_diff'] 0.5753125
['4-fighter_score_diff', '19-fighter_score_diff', '17-fighter_score_diff', '1-fighter_score_diff'] 0.57625
['4-fighter_score_diff', '19-fighter_score_diff', '17-fighter_score_diff', '1-fighter_score_diff', '15-fighter_score_diff'] 0.5759375


(['4-fighter_score_diff',
  '19-fighter_score_diff',
  '17-fighter_score_diff',
  '1-fighter_score_diff'],
 0.57625)

In [87]:
#greedy score with all fighter score stats
greedy(ufc_fights,fighter_score_stats,2)

['19-fighter_score_diff', '4-fighter_score_diff'] 0.573125
['19-fighter_score_diff', '4-fighter_score_diff', '17-fighter_score_diff', '1-fighter_score_diff'] 0.57625
['19-fighter_score_diff', '4-fighter_score_diff', '17-fighter_score_diff', '1-fighter_score_diff', '17-fighter_score_diff', '13-fighter_score_diff'] 0.5775
['19-fighter_score_diff', '4-fighter_score_diff', '17-fighter_score_diff', '1-fighter_score_diff', '17-fighter_score_diff', '13-fighter_score_diff', '17-fighter_score_diff', '15-fighter_score_diff'] 0.5778125000000001
['19-fighter_score_diff', '4-fighter_score_diff', '17-fighter_score_diff', '1-fighter_score_diff', '17-fighter_score_diff', '13-fighter_score_diff', '17-fighter_score_diff', '15-fighter_score_diff', '17-fighter_score_diff', '4-fighter_score_diff'] 0.5781250000000001
['19-fighter_score_diff', '4-fighter_score_diff', '17-fighter_score_diff', '1-fighter_score_diff', '17-fighter_score_diff', '13-fighter_score_diff', '17-fighter_score_diff', '15-fighter_score_d

(['19-fighter_score_diff',
  '4-fighter_score_diff',
  '17-fighter_score_diff',
  '1-fighter_score_diff',
  '17-fighter_score_diff',
  '13-fighter_score_diff',
  '17-fighter_score_diff',
  '15-fighter_score_diff',
  '17-fighter_score_diff',
  '4-fighter_score_diff',
  '1-fighter_score_diff',
  '15-fighter_score_diff'],
 0.5784375)

In [88]:
#greedy score with all fighter score stats
greedy(ufc_fights,fighter_score_stats,3)

['15-fighter_score_diff', '4-fighter_score_diff', '7-fighter_score_diff'] 0.5753125000000001
['15-fighter_score_diff', '4-fighter_score_diff', '7-fighter_score_diff', '3-fighter_score_diff', '11-fighter_score_diff', '9-fighter_score_diff'] 0.578125
['15-fighter_score_diff', '4-fighter_score_diff', '7-fighter_score_diff', '3-fighter_score_diff', '11-fighter_score_diff', '9-fighter_score_diff', '17-fighter_score_diff', '19-fighter_score_diff', '15-fighter_score_diff'] 0.5781250000000001
['15-fighter_score_diff', '4-fighter_score_diff', '7-fighter_score_diff', '3-fighter_score_diff', '11-fighter_score_diff', '9-fighter_score_diff', '17-fighter_score_diff', '19-fighter_score_diff', '15-fighter_score_diff', '17-fighter_score_diff', '19-fighter_score_diff', '9-fighter_score_diff'] 0.5787500000000001
['15-fighter_score_diff', '4-fighter_score_diff', '7-fighter_score_diff', '3-fighter_score_diff', '11-fighter_score_diff', '9-fighter_score_diff', '17-fighter_score_diff', '19-fighter_score_diff'

(['15-fighter_score_diff',
  '4-fighter_score_diff',
  '7-fighter_score_diff',
  '3-fighter_score_diff',
  '11-fighter_score_diff',
  '9-fighter_score_diff',
  '17-fighter_score_diff',
  '19-fighter_score_diff',
  '15-fighter_score_diff',
  '17-fighter_score_diff',
  '19-fighter_score_diff',
  '9-fighter_score_diff'],
 0.5787500000000001)

In [82]:
#greedy score with all fight math stats and fighter score stats
greedy(ufc_fights,fight_math_stats+fighter_score_stats,1)

['4-fighter_score_diff'] 0.5678124999999999
['4-fighter_score_diff', '9-fight_math'] 0.57875
['4-fighter_score_diff', '9-fight_math', '13-fighter_score_diff'] 0.5809375000000001
['4-fighter_score_diff', '9-fight_math', '13-fighter_score_diff', '4-fight_math'] 0.5825
['4-fighter_score_diff', '9-fight_math', '13-fighter_score_diff', '4-fight_math', '6-fighter_score_diff'] 0.5840625
['4-fighter_score_diff', '9-fight_math', '13-fighter_score_diff', '4-fight_math', '6-fighter_score_diff', '15-fighter_score_diff'] 0.5846874999999999
['4-fighter_score_diff', '9-fight_math', '13-fighter_score_diff', '4-fight_math', '6-fighter_score_diff', '15-fighter_score_diff', '17-fighter_score_diff'] 0.584375


(['4-fighter_score_diff',
  '9-fight_math',
  '13-fighter_score_diff',
  '4-fight_math',
  '6-fighter_score_diff',
  '15-fighter_score_diff'],
 0.5846874999999999)

In [90]:
#greedy score with all fight math stats and fighter score stats
greedy(ufc_fights,fight_math_stats+fighter_score_stats,2)

['4-fighter_score_diff', '9-fight_math'] 0.57875
['4-fighter_score_diff', '9-fight_math', '19-fighter_score_diff', '9-fighter_score_diff'] 0.5846875
['4-fighter_score_diff', '9-fight_math', '19-fighter_score_diff', '9-fighter_score_diff', '17-fighter_score_diff', '19-fighter_score_diff'] 0.58625
['4-fighter_score_diff', '9-fight_math', '19-fighter_score_diff', '9-fighter_score_diff', '17-fighter_score_diff', '19-fighter_score_diff', '3-fighter_score_diff', '19-fighter_score_diff'] 0.586875
['4-fighter_score_diff', '9-fight_math', '19-fighter_score_diff', '9-fighter_score_diff', '17-fighter_score_diff', '19-fighter_score_diff', '3-fighter_score_diff', '19-fighter_score_diff', '17-fighter_score_diff', '9-fighter_score_diff'] 0.586875


(['4-fighter_score_diff',
  '9-fight_math',
  '19-fighter_score_diff',
  '9-fighter_score_diff',
  '17-fighter_score_diff',
  '19-fighter_score_diff',
  '3-fighter_score_diff',
  '19-fighter_score_diff'],
 0.586875)

In [91]:
#greedy score with all fight math stats and fighter score stats
greedy(ufc_fights,fight_math_stats+fighter_score_stats,3)

['6-fight_math', '15-fighter_score_diff', '4-fighter_score_diff'] 0.5815625
['6-fight_math', '15-fighter_score_diff', '4-fighter_score_diff', '17-fighter_score_diff', '1-fight_math', '9-fighter_score_diff'] 0.5856250000000001
['6-fight_math', '15-fighter_score_diff', '4-fighter_score_diff', '17-fighter_score_diff', '1-fight_math', '9-fighter_score_diff', '1-fighter_score_diff', '5-fight_math', '13-fighter_score_diff'] 0.586875
['6-fight_math', '15-fighter_score_diff', '4-fighter_score_diff', '17-fighter_score_diff', '1-fight_math', '9-fighter_score_diff', '1-fighter_score_diff', '5-fight_math', '13-fighter_score_diff', '1-fighter_score_diff', '5-fight_math', '4-fighter_score_diff'] 0.5871875
['6-fight_math', '15-fighter_score_diff', '4-fighter_score_diff', '17-fighter_score_diff', '1-fight_math', '9-fighter_score_diff', '1-fighter_score_diff', '5-fight_math', '13-fighter_score_diff', '1-fighter_score_diff', '5-fight_math', '4-fighter_score_diff', '1-fighter_score_diff', '5-fight_math',

(['6-fight_math',
  '15-fighter_score_diff',
  '4-fighter_score_diff',
  '17-fighter_score_diff',
  '1-fight_math',
  '9-fighter_score_diff',
  '1-fighter_score_diff',
  '5-fight_math',
  '13-fighter_score_diff',
  '1-fighter_score_diff',
  '5-fight_math',
  '4-fighter_score_diff'],
 0.5871875)

In [123]:
best_smallest_set=list(set(['fighter_age_diff', 
                            'reach_diff', 
                            'fighter_L5Y_ko_losses_diff_2', 
                            'fighter_L5Y_losses_diff_2', 
                            'fighter_L2Y_wins_diff_2', 
                            'fighter_L5Y_wins_diff_2', 
                            'fighter_L5Y_sub_wins_diff_2', 
                            'fighter_abs_total_strikes_landed_avg_diff_2', 
                            'fighter_inf_head_strikes_landed_avg_diff_2', 
                            'fighter_inf_leg_strikes_landed_avg_diff_2', 
                            'fighter_abs_head_strikes_landed_avg_diff_2', 
                            'fighter_inf_knockdowns_avg_diff_2', 
                            'fighter_inf_clinch_strikes_attempts_avg_diff_2', 
                            'fighter_inf_takedowns_attempts_avg_diff_2', 
                            'fighter_inf_ground_strikes_landed_avg_diff_2', 
                            'fighter_inf_sig_strikes_landed_avg_diff_2',
                            '6-fight_math',
                              '15-fighter_score_diff',
                              '4-fighter_score_diff',
                              '17-fighter_score_diff',
                              '1-fight_math',
                              '9-fighter_score_diff',
                              '1-fighter_score_diff',
                              '5-fight_math',
                              '13-fighter_score_diff',
                              '1-fighter_score_diff',
                              '5-fight_math',
                              '4-fighter_score_diff'
]))

ufc_fights_df = ufc_fights[best_smallest_set]

winPredictionModel=LogisticRegression(solver='lbfgs', max_iter=2000)
X=ufc_fights_df.iloc[0:40*55].to_numpy()
y=ufc_fights['result'].iloc[0:40*55]
winPredictionModel.fit(X,y)
winPredictionModel.score(X,y)

0.6536363636363637

In [93]:
greedy(ufc_fights,best_smallest_set,1)

['fighter_age_diff'] 0.5803125
['fighter_age_diff', '4-fighter_score_diff'] 0.6065625
['fighter_age_diff', '4-fighter_score_diff', 'fighter_abs_total_strikes_landed_avg_diff_2'] 0.6190625000000001
['fighter_age_diff', '4-fighter_score_diff', 'fighter_abs_total_strikes_landed_avg_diff_2', '6-fight_math'] 0.624375
['fighter_age_diff', '4-fighter_score_diff', 'fighter_abs_total_strikes_landed_avg_diff_2', '6-fight_math', 'fighter_L5Y_ko_losses_diff_2'] 0.6268750000000001
['fighter_age_diff', '4-fighter_score_diff', 'fighter_abs_total_strikes_landed_avg_diff_2', '6-fight_math', 'fighter_L5Y_ko_losses_diff_2', 'fighter_abs_head_strikes_landed_avg_diff_2'] 0.6284375
['fighter_age_diff', '4-fighter_score_diff', 'fighter_abs_total_strikes_landed_avg_diff_2', '6-fight_math', 'fighter_L5Y_ko_losses_diff_2', 'fighter_abs_head_strikes_landed_avg_diff_2', 'fighter_inf_knockdowns_avg_diff_2'] 0.6296875
['fighter_age_diff', '4-fighter_score_diff', 'fighter_abs_total_strikes_landed_avg_diff_2', '6-fig

(['fighter_age_diff',
  '4-fighter_score_diff',
  'fighter_abs_total_strikes_landed_avg_diff_2',
  '6-fight_math',
  'fighter_L5Y_ko_losses_diff_2',
  'fighter_abs_head_strikes_landed_avg_diff_2',
  'fighter_inf_knockdowns_avg_diff_2',
  'fighter_inf_head_strikes_landed_avg_diff_2',
  '9-fighter_score_diff',
  '1-fight_math'],
 0.6328125)

In [94]:
greedy(ufc_fights,best_smallest_set,2)

['fighter_age_diff', '4-fighter_score_diff'] 0.6065625
['fighter_age_diff', '4-fighter_score_diff', 'fighter_abs_total_strikes_landed_avg_diff_2', '6-fight_math'] 0.624375
['fighter_age_diff', '4-fighter_score_diff', 'fighter_abs_total_strikes_landed_avg_diff_2', '6-fight_math', 'fighter_L5Y_sub_wins_diff_2', 'fighter_inf_takedowns_attempts_avg_diff_2'] 0.630625
['fighter_age_diff', '4-fighter_score_diff', 'fighter_abs_total_strikes_landed_avg_diff_2', '6-fight_math', 'fighter_L5Y_sub_wins_diff_2', 'fighter_inf_takedowns_attempts_avg_diff_2', 'fighter_inf_head_strikes_landed_avg_diff_2', 'fighter_L5Y_ko_losses_diff_2'] 0.6331249999999999
['fighter_age_diff', '4-fighter_score_diff', 'fighter_abs_total_strikes_landed_avg_diff_2', '6-fight_math', 'fighter_L5Y_sub_wins_diff_2', 'fighter_inf_takedowns_attempts_avg_diff_2', 'fighter_inf_head_strikes_landed_avg_diff_2', 'fighter_L5Y_ko_losses_diff_2', 'fighter_inf_ground_strikes_landed_avg_diff_2', '1-fight_math'] 0.6359374999999999
['fighter

(['fighter_age_diff',
  '4-fighter_score_diff',
  'fighter_abs_total_strikes_landed_avg_diff_2',
  '6-fight_math',
  'fighter_L5Y_sub_wins_diff_2',
  'fighter_inf_takedowns_attempts_avg_diff_2',
  'fighter_inf_head_strikes_landed_avg_diff_2',
  'fighter_L5Y_ko_losses_diff_2',
  'fighter_inf_ground_strikes_landed_avg_diff_2',
  '1-fight_math',
  'fighter_abs_head_strikes_landed_avg_diff_2',
  'fighter_inf_knockdowns_avg_diff_2',
  'fighter_L5Y_sub_wins_diff_2',
  '9-fighter_score_diff'],
 0.6378125)

In [95]:
greedy(ufc_fights,best_smallest_set,3)

['fighter_abs_total_strikes_landed_avg_diff_2', 'fighter_age_diff', '4-fighter_score_diff'] 0.6190625000000001
['fighter_abs_total_strikes_landed_avg_diff_2', 'fighter_age_diff', '4-fighter_score_diff', 'fighter_L5Y_sub_wins_diff_2', '6-fight_math', 'fighter_inf_takedowns_attempts_avg_diff_2'] 0.630625
['fighter_abs_total_strikes_landed_avg_diff_2', 'fighter_age_diff', '4-fighter_score_diff', 'fighter_L5Y_sub_wins_diff_2', '6-fight_math', 'fighter_inf_takedowns_attempts_avg_diff_2', 'fighter_abs_head_strikes_landed_avg_diff_2', 'fighter_L5Y_losses_diff_2', '1-fight_math'] 0.6353125
['fighter_abs_total_strikes_landed_avg_diff_2', 'fighter_age_diff', '4-fighter_score_diff', 'fighter_L5Y_sub_wins_diff_2', '6-fight_math', 'fighter_inf_takedowns_attempts_avg_diff_2', 'fighter_abs_head_strikes_landed_avg_diff_2', 'fighter_L5Y_losses_diff_2', '1-fight_math', 'fighter_inf_head_strikes_landed_avg_diff_2', 'fighter_L5Y_ko_losses_diff_2', '17-fighter_score_diff'] 0.6368750000000001
['fighter_abs_

(['fighter_abs_total_strikes_landed_avg_diff_2',
  'fighter_age_diff',
  '4-fighter_score_diff',
  'fighter_L5Y_sub_wins_diff_2',
  '6-fight_math',
  'fighter_inf_takedowns_attempts_avg_diff_2',
  'fighter_abs_head_strikes_landed_avg_diff_2',
  'fighter_L5Y_losses_diff_2',
  '1-fight_math',
  'fighter_inf_head_strikes_landed_avg_diff_2',
  'fighter_L5Y_ko_losses_diff_2',
  '17-fighter_score_diff',
  'fighter_inf_ground_strikes_landed_avg_diff_2',
  '15-fighter_score_diff',
  '9-fighter_score_diff'],
 0.6371875)

In [97]:
refined_smallest = ['fighter_abs_total_strikes_landed_avg_diff_2', 'fighter_age_diff', '4-fighter_score_diff', 'fighter_L5Y_sub_wins_diff_2', '6-fight_math', 'fighter_inf_takedowns_attempts_avg_diff_2', 'fighter_abs_head_strikes_landed_avg_diff_2', 'fighter_L5Y_losses_diff_2', '1-fight_math', 'fighter_inf_head_strikes_landed_avg_diff_2', 'fighter_L5Y_ko_losses_diff_2', '17-fighter_score_diff', 'fighter_inf_ground_strikes_landed_avg_diff_2', '15-fighter_score_diff', '9-fighter_score_diff']

In [98]:
greedy(ufc_fights,refined_smallest,4)

['fighter_abs_total_strikes_landed_avg_diff_2', 'fighter_age_diff', '6-fight_math', '4-fighter_score_diff'] 0.624375
['fighter_abs_total_strikes_landed_avg_diff_2', 'fighter_age_diff', '6-fight_math', '4-fighter_score_diff', 'fighter_inf_head_strikes_landed_avg_diff_2', 'fighter_L5Y_sub_wins_diff_2', 'fighter_inf_takedowns_attempts_avg_diff_2', 'fighter_L5Y_ko_losses_diff_2'] 0.6331249999999999
['fighter_abs_total_strikes_landed_avg_diff_2', 'fighter_age_diff', '6-fight_math', '4-fighter_score_diff', 'fighter_inf_head_strikes_landed_avg_diff_2', 'fighter_L5Y_sub_wins_diff_2', 'fighter_inf_takedowns_attempts_avg_diff_2', 'fighter_L5Y_ko_losses_diff_2', '17-fighter_score_diff', 'fighter_abs_head_strikes_landed_avg_diff_2', 'fighter_L5Y_losses_diff_2', '1-fight_math'] 0.6368750000000001
['fighter_abs_total_strikes_landed_avg_diff_2', 'fighter_age_diff', '6-fight_math', '4-fighter_score_diff', 'fighter_inf_head_strikes_landed_avg_diff_2', 'fighter_L5Y_sub_wins_diff_2', 'fighter_inf_takedow

(['fighter_abs_total_strikes_landed_avg_diff_2',
  'fighter_age_diff',
  '6-fight_math',
  '4-fighter_score_diff',
  'fighter_inf_head_strikes_landed_avg_diff_2',
  'fighter_L5Y_sub_wins_diff_2',
  'fighter_inf_takedowns_attempts_avg_diff_2',
  'fighter_L5Y_ko_losses_diff_2',
  '17-fighter_score_diff',
  'fighter_abs_head_strikes_landed_avg_diff_2',
  'fighter_L5Y_losses_diff_2',
  '1-fight_math',
  'fighter_age_diff',
  'fighter_inf_ground_strikes_landed_avg_diff_2',
  '15-fighter_score_diff',
  '9-fighter_score_diff'],
 0.6371875)

In [99]:
list(set(['fighter_abs_total_strikes_landed_avg_diff_2',
  'fighter_age_diff',
  '6-fight_math',
  '4-fighter_score_diff',
  'fighter_inf_head_strikes_landed_avg_diff_2',
  'fighter_L5Y_sub_wins_diff_2',
  'fighter_inf_takedowns_attempts_avg_diff_2',
  'fighter_L5Y_ko_losses_diff_2',
  '17-fighter_score_diff',
  'fighter_abs_head_strikes_landed_avg_diff_2',
  'fighter_L5Y_losses_diff_2',
  '1-fight_math',
  'fighter_age_diff',
  'fighter_inf_ground_strikes_landed_avg_diff_2',
  '15-fighter_score_diff',
  '9-fighter_score_diff']))

['17-fighter_score_diff',
 'fighter_age_diff',
 'fighter_abs_total_strikes_landed_avg_diff_2',
 'fighter_L5Y_sub_wins_diff_2',
 'fighter_L5Y_losses_diff_2',
 '6-fight_math',
 'fighter_L5Y_ko_losses_diff_2',
 'fighter_inf_ground_strikes_landed_avg_diff_2',
 '15-fighter_score_diff',
 '9-fighter_score_diff',
 'fighter_abs_head_strikes_landed_avg_diff_2',
 'fighter_inf_takedowns_attempts_avg_diff_2',
 '1-fight_math',
 'fighter_inf_head_strikes_landed_avg_diff_2',
 '4-fighter_score_diff']

In [108]:
#max at 52 so iloc = 40*52 = 2080
for i in range(2,160):
    print(i,model_score(ufc_fights,current_best,40*i))

2 0.55
3 0.5916666666666666
4 0.65625
5 0.68
6 0.6041666666666666
7 0.6285714285714286
8 0.58125
9 0.5805555555555556
10 0.5775
11 0.5954545454545455
12 0.5541666666666666
13 0.5846153846153845
14 0.5928571428571429
15 0.5850000000000001
16 0.596875
17 0.5852941176470587
18 0.5916666666666667
19 0.606578947368421
20 0.5900000000000001
21 0.6083333333333333
22 0.6022727272727273
23 0.6000000000000001
24 0.5947916666666667
25 0.604
26 0.5990384615384615
27 0.6083333333333334
28 0.6071428571428572
29 0.6077586206896552
30 0.6133333333333333
31 0.6169354838709677
32 0.6203125
33 0.6189393939393939
34 0.6272058823529412
35 0.6235714285714286
36 0.6222222222222222
37 0.6141891891891892
38 0.6236842105263157
39 0.617948717948718
40 0.6199999999999999
41 0.6195121951219512
42 0.6178571428571429
43 0.6232558139534884
44 0.625
45 0.6238888888888888
46 0.6233695652173913
47 0.6276595744680851
48 0.6286458333333333
49 0.6290816326530613
50 0.6295
51 0.6318627450980392
52 0.6389423076923076
53 0.63

In [118]:
#apparently 30 is enough iterations
{‘l1’, ‘l2’, ‘elasticnet’, ‘none’}
model_score(ufc_fights,current_best,2080,30)

0.6389423076923076

In [122]:
#apparently 30 is enough iterations
#{‘l1’, ‘l2’, ‘elasticnet’, ‘none’}
model_score(ufc_fights,best_smallest_set,2080,500)

0.6389423076923076

In [129]:
#getting very close to 64% now... big improvement but not really enough to 
best_smallest_set=list(set(['fighter_age_diff', 
                            'reach_diff', 
                            'fighter_L5Y_ko_losses_diff_2', 
                            'fighter_L5Y_losses_diff_2', 
                            'fighter_L2Y_wins_diff_2', 
                            'fighter_L5Y_wins_diff_2', 
                            'fighter_L5Y_sub_wins_diff_2', 
                            'fighter_abs_total_strikes_landed_avg_diff_2', 
                            'fighter_inf_head_strikes_landed_avg_diff_2', 
                            'fighter_inf_leg_strikes_landed_avg_diff_2', 
                            'fighter_abs_head_strikes_landed_avg_diff_2', 
                            'fighter_inf_knockdowns_avg_diff_2', 
                            'fighter_inf_clinch_strikes_attempts_avg_diff_2', 
                            'fighter_inf_takedowns_attempts_avg_diff_2', 
                            'fighter_inf_ground_strikes_landed_avg_diff_2', 
                            'fighter_inf_sig_strikes_landed_avg_diff_2',
                              '6-fight_math',
                              '15-fighter_score_diff',
                              '4-fighter_score_diff',
                              '17-fighter_score_diff',
                              '1-fight_math',
                              '9-fighter_score_diff',
                              '1-fighter_score_diff',
                              '5-fight_math',
                              '13-fighter_score_diff',
                              '1-fighter_score_diff',
                              '5-fight_math',
                              '4-fighter_score_diff'
]))

winPredictionModel=LogisticRegression(solver='lbfgs', max_iter=2000)
X=ufc_fights[best_smallest_set].iloc[0:2080]
y=ufc_fights['result'].iloc[0:2080]
winPredictionModel.fit(X,y)

print('model score: '+str(winPredictionModel.score(X,y)))
print('cross val score: '+str(model_score(ufc_fights,best_smallest_set,2080,500)))

model score: 0.6567307692307692
cross val score: 0.6389423076923076


In [130]:
current_best = ['4-fighter_score_diff',
 '9-fighter_score_diff',
 '15-fighter_score_diff',
 '17-fighter_score_diff',
 '1-fight_math',
 '6-fight_math',
 'fighter_L5Y_sub_wins_diff_2',
 'fighter_L5Y_losses_diff_2',
 'fighter_L5Y_ko_losses_diff_2',
 'fighter_age_diff',
 'fighter_abs_total_strikes_landed_avg_diff_2',
 'fighter_abs_head_strikes_landed_avg_diff_2',
 'fighter_inf_ground_strikes_landed_avg_diff_2',
 'fighter_inf_takedowns_attempts_avg_diff_2',
 'fighter_inf_head_strikes_landed_avg_diff_2',
 ]

winPredictionModel=LogisticRegression(solver='lbfgs', max_iter=2000)
X=ufc_fights[current_best].iloc[0:2080]
y=ufc_fights['result'].iloc[0:2080]
winPredictionModel.fit(X,y)

print('model score: '+str(winPredictionModel.score(X,y)))
print('cross val score: '+str(model_score(ufc_fights,current_best,2080,500)))

model score: 0.6432692307692308
cross val score: 0.6389423076923076


In [14]:
ufc_fights_graph = pd.read_csv('ufc_fights_crap.csv',low_memory=False)
odd_indices = range(1,len(ufc_fights_graph.index),2)
ufc_fights_graph = ufc_fights_graph.drop(odd_indices)
ufc_fights_graph = ufc_fights_graph[['fighter', 'opponent', 'method', 'date', 'division']]
ufc_fights_graph = ufc_fights_graph.reset_index(drop=True);
ufc_wins_list=[]

for i in ufc_fights_graph.index:
    temp_list = []
    temp_list.append(ufc_fights_graph['fighter'][i])
    temp_list.append(ufc_fights_graph['opponent'][i])
    temp_list.append(ufc_fights_graph['date'][i])
    temp_list.append(ufc_fights_graph['division'][i])
    ufc_wins_list.append(temp_list)

In [15]:
#need to define variable "years" before calling this function
def generate_rankings(weight_class, date):
    weight_mask = ufc_fights['division']==weight_class
    relevant_wins = [fight for fight in ufc_wins_list if 
                     (0<time_diff(fight[2],date)<years*365 and fight[3]==weight_class)]
    winners = {}
    for fight in relevant_wins:
        winners[fight[0]]=fighter_score(fight[0],date)
    winners_sorted = {k: v for k, v in sorted(winners.items(), key=lambda item: -item[1])}
    return winners_sorted
    

In [19]:
years = 6
generate_rankings('Welterweight','March 25, 2022')

{'Kamaru Usman': 30,
 'Leon Edwards': 29,
 'Vicente Luque': 25,
 'Belal Muhammad': 19,
 'Anthony Pettis': 18,
 'Elizeu Zaleski dos Santos': 17,
 'Sean Strickland': 17,
 'Geoff Neal': 16,
 'Jingliang Li': 16,
 'Muslim Salikhov': 15,
 'Khaos Williams': 14,
 'Gilbert Burns': 14,
 'Santiago Ponzinibbio': 14,
 'Anthony Rocco Martin': 14,
 'Colby Covington': 13,
 'Stephen Thompson': 13,
 'Neil Magny': 12,
 'Michel Prazeres': 12,
 'Shavkat Rakhmonov': 11,
 'Francisco Trinaldo': 11,
 'Daniel Rodriguez': 11,
 'Khamzat Chimaev': 10,
 'Sean Brady': 9,
 'Niko Price': 9,
 'Rafael Dos Anjos': 9,
 'Kevin Holland': 8,
 'Michel Pereira': 8,
 'Michael Chiesa': 7,
 'James Krause': 7,
 'Rustam Khabilov': 7,
 'Lorenz Larkin': 7,
 'Ismail Naurdiev': 6,
 'Jordan Mein': 6,
 'Alan Jouban': 5,
 'Jake Matthews': 5,
 'Mike Perry': 5,
 'Demian Maia': 5,
 'Alex Morono': 4,
 'Conor McGregor': 4,
 'Tyron Woodley': 4,
 'Omari Akhmedov': 4,
 'Michael Graves': 4,
 'Gunnar Nelson': 3,
 'Randy Brown': 3,
 'Warlley Alves':