# Intro
Using both datasets to create a model that will predict the results of any two fighters.

## Data transformation
I need to convert all of the data in the fight dataset to be the same as the fighter dataset. This will allow us use any fighters data to predict the results of the fight.

In [21]:
import pandas as pd
from collections import defaultdict
from itertools import cycle
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [2]:
#read in the data
fight = pd.read_csv('processed_fight_data.csv')
fighter = pd.read_csv('processed_fighter_data.csv')

Columns that will need to be transformed:
1. create a strikes landed per minute
2. Add consecutive wins to the fight dataset
3. add fighter record to fight dataset
4. add strikes absorbed per minute
6. add reaches and or reach advantage
7. add stance

In [3]:
# I'm going to create an entirely new data frame since a lot of columns will be dropped from the processed fight data
fight_transformed = pd.DataFrame(defaultdict(list))

In [4]:
#adding the columns that will remain the same
fight_transformed['winner'] = fight['winner']
fight_transformed['win_con'] = fight['win_con']
fight_transformed['title_bout'] = fight['title_bout']
fight_transformed['r_fighter'] = fight['r_fighter']
fight_transformed['b_fighter'] = fight['b_fighter']
fight_transformed['weight_class'] = fight['weight_class']
fight_transformed['r_str_acc'] = fight['r_sig_str_per']
fight_transformed['b_str_acc'] = fight['b_sig_str_per']
fight_transformed['r_td'] = fight['r_td']
fight_transformed['b_td'] = fight['b_td']

## 1.
adding the strikes landed per minute

In [5]:
def SLpM(row, check='r'):
    try:#there are a few fights that are sub 1 minute in those instances we will return the sig str
        if check == 'r':#getting red fighter stats
            return row['r_sig_str']/row['fight_length']
        elif check == 'b':
            return row['b_sig_str']/row['fight_length']
    except:
        if check == 'r':
            return row['r_sig_str']
        elif check == 'b':
            return row['b_sig_str']

In [6]:
fight_transformed['r_SLpM'] = fight.apply(SLpM, axis = 1)
fight_transformed['b_SLpM'] = fight.apply(SLpM,check = 'b', axis =1)

## 2.
Adding strikes absorbed per minute

In [7]:
def SApM(row, check='r'):
    try:#there are a few fights that are sub 1 minute in those instances we will return the sig str
        if check == 'r':#getting red fighter stats
            return row['b_sig_str']/row['fight_length']
        elif check == 'b':
            return row['r_sig_str']/row['fight_length']
    except:
        if check == 'r':
            return row['b_sig_str']
        elif check == 'b':
            return row['r_sig_str']

In [8]:
fight_transformed['r_SApM'] = fight.apply(SApM, axis = 1)
fight_transformed['b_SApM'] = fight.apply(SApM,check = 'b', axis =1)

## 3.-4.
Add consecutive wins and record

In [9]:
fight_transformed['r_consec_wins'] = None
fight_transformed['b_consec_wins'] = None
fight_transformed['r_wins'] = None
fight_transformed['b_wins'] = None
fight_transformed['r_losses'] = None
fight_transformed['b_losses'] = None

In [10]:
#this is a recursive function that will keep track of consecutive wins, and create a fighter record of total wins and losses in the ufc
def get_record(fighter_name, indices, index=0, counter=0, wins = 0, losses = 0):        
    if indices[index] == indices[-1]:#most recent fight
        if fight['r_fighter'][indices[index]]==fighter_name:
            fight_transformed['r_consec_wins'][indices[index]] = counter
            fight_transformed['r_wins'][indices[index]] = wins
            fight_transformed['r_losses'][indices[index]] = losses
            
        elif fight['b_fighter'][indices[index]]==fighter_name:
            fight_transformed['b_consec_wins'][indices[index]] = counter
            fight_transformed['b_wins'][indices[index]] = wins
            fight_transformed['b_losses'][indices[index]] = losses
    else:
        if fight['r_fighter'][indices[index]] == fighter_name and fight['winner'][indices[index]]=='r':
                fight_transformed['r_consec_wins'][indices[index]] = counter
                fight_transformed['r_wins'][indices[index]] = wins
                fight_transformed['r_losses'][indices[index]] = losses
                counter+=1
                index+=1
                wins+=1
                return get_record(fighter_name, indices, index, counter, wins, losses)
        elif fight['b_fighter'][indices[index]] == fighter_name and fight['winner'][indices[index]]=='b':
            fight_transformed['b_consec_wins'][indices[index]] = counter
            fight_transformed['b_wins'][indices[index]] = wins
            fight_transformed['b_losses'][indices[index]] = losses
            counter+=1
            index+=1
            wins+=1
            return get_record(fighter_name, indices, index, counter, wins, losses)
        elif fight['b_fighter'][indices[index]] == fighter_name and fight['winner'][indices[index]]=='r':
            fight_transformed['b_consec_wins'][indices[index]] = counter
            fight_transformed['b_wins'][indices[index]] = wins
            fight_transformed['b_losses'][indices[index]] = losses
            index+=1
            counter = 0
            losses+=1
            return get_record(fighter_name, indices, index, counter, wins, losses)
        else:
            fight_transformed['r_consec_wins'][indices[index]] = counter
            fight_transformed['r_wins'][indices[index]] = wins
            fight_transformed['r_losses'][indices[index]] = losses
            index+=1
            counter = 0
            losses +=1
            return get_record(fighter_name, indices, index, counter, wins, losses)

In [11]:
all_fighters = pd.concat([fight['r_fighter'], fight['b_fighter']]).unique()#getting every fighter in the ufc

In [12]:
import warnings
warnings.filterwarnings('ignore')#ingnoring the warnings because it's a repeated caveat message about assigning values by slicing
for fighters in tqdm(all_fighters):
    indices = fight[(fight['r_fighter']==fighters) | (fight['b_fighter']==fighters)].index
    get_record(fighters, indices[::-1])

100%|██████████████████████████████████████████████████████████████████████████████| 2119/2119 [00:26<00:00, 79.84it/s]


## 5.
Adding the reach advantage to the fighters

In [13]:
def get_reach(row):
    try:
        r_reach = fighter['reach_in'].loc[fighter['fighter_name']==row['r_fighter']].values[0]
        b_reach = fighter['reach_in'].loc[fighter['fighter_name']==row['b_fighter']].values[0]
        #print(row['r_fighter'])
        #print(row['b_fighter'])
        r_reach_adv = r_reach - b_reach
        return r_reach_adv
    except: #in case a fighter is not in the fighter dataset, since there are no nulls in the fighter data reach
        return None

In [14]:
#creating column to apply the get_reach function to
fight['r_reach_adv'] = None

In [15]:
fight['r_reach_adv'] = fight.apply(get_reach, axis = 1)

In [16]:
fight.shape[0] - fight.loc[fight['r_reach_adv'].notna()].shape[0]

1037

We were missing a fighter's reach that was mentioned in a fight from the fighter dataset 1037 times. After checking the ufcstats.com these appear to values they were missing from the website and not an error with the data scraping and cleaning.

In [17]:
fight_transformed['r_reach_adv'] = fight['r_reach_adv']

In [18]:
fight_transformed.shape

(5858, 21)

## 6.
Adding each fighters stance

In [19]:
fight_transformed['r_stance'] = None
fight_transformed['b_stance'] = None

In [20]:
def get_stance(row, check = 'r'):
    try:
        if check == 'r':
            r_stance = fighter['stance'].loc[fighter['fighter_name']==row['r_fighter']].values[0]
            return r_stance
        elif check =='b':
            b_stance = fighter['stance'].loc[fighter['fighter_name']==row['b_fighter']].values[0]
            return b_stance
    except: #in case a fighter is not in the fighter dataset, since there are no nulls in the fighter data reach
        return None

In [21]:
fight_transformed['r_stance'] = fight.apply(get_stance, axis = 1)
fight_transformed['b_stance'] = fight.apply(get_stance, check = 'b', axis = 1)

In [22]:
fight_transformed.head()

Unnamed: 0,winner,win_con,title_bout,r_fighter,b_fighter,weight_class,r_str_acc,b_str_acc,r_td,b_td,r_SLpM,b_SLpM,r_SApM,b_SApM,r_consec_wins,b_consec_wins,r_wins,b_wins,r_losses,b_losses,r_reach_adv,r_stance,b_stance
0,b,KO/TKO,False,Curtis Blaydes,Derrick Lewis,Heavyweight,0.53,0.3,0,0,4.666667,1.166667,1.166667,4.666667,4,3,9,15,2,5,1.0,Orthodox,Orthodox
1,b,Decision - Unanimous,False,Ketlen Vieira,Yana Kunitskaya,Women's Bantamweight,0.43,0.78,3,0,0.466667,3.133333,3.133333,0.466667,1,1,5,3,1,2,0.0,Orthodox,Orthodox
2,b,Decision - Unanimous,False,Charles Rosa,Darrick Minner,Featherweight,0.6,0.75,0,4,1.0,3.0,3.0,1.0,1,1,4,1,4,1,0.0,Switch,Orthodox
3,b,KO/TKO,False,Aleksei Oleinik,Chris Daukaus,Heavyweight,0.5,0.6,0,0,5.0,34.0,34.0,5.0,0,2,8,2,5,0,4.0,Orthodox,Orthodox
4,r,Decision - Majority,False,Phil Hawes,Nassourdine Imavov,Middleweight,0.57,0.61,4,0,2.066667,3.8,3.8,2.066667,1,1,1,1,0,0,2.0,Orthodox,Orthodox


In [23]:
fight_transformed.to_csv('transformed_fight_data.csv', index = False)

# Classifier

In [2]:
fight_transformed = pd.read_csv('transformed_fight_data.csv')

In [19]:
#these three columns are the only ones with missing values, the missing values come from a lack of data on the ufcstats website
na_columns = ['r_reach_adv', 'r_stance', 'b_stance']
for col in na_columns:
    print(col)
    print(sum(fight_transformed[col].isna()))

r_reach_adv
0
r_stance
0
b_stance
0


In [20]:
#imputing the data using averages
fight_transformed['r_reach_adv'] = fight_transformed.r_reach_adv.fillna(fight_transformed.r_reach_adv.mean())
fight_transformed.r_stance = fight_transformed.fillna('Orthodox')
fight_transformed.b_stance = fight_transformed.fillna('Orthodox')

In [23]:
#creating dummy variables for stance and weight class
all_dummies = pd.get_dummies(fight_transformed[['r_stance', 'b_stance', 'weight_class']])
all_dummies

Unnamed: 0,r_stance_b,r_stance_r,b_stance_b,b_stance_r,weight_class_10 Tournament,weight_class_13 Heavyweight Tournament,weight_class_13 Lightweight Tournament,weight_class_14 Heavyweight Tournament,weight_class_14 Middleweight Tournament,weight_class_15 Heavyweight Tournament,weight_class_16 Lightweight Tournament,weight_class_17 Middleweight Tournament,weight_class_2 Tournament,weight_class_3 Tournament,weight_class_4 Tournament,weight_class_5 Tournament,weight_class_6 Tournament,weight_class_7 Tournament,weight_class_8 Tournament,weight_class_Bantamweight,weight_class_Catch Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Interim Bantamweight,weight_class_Interim Featherweight,weight_class_Interim Heavyweight,weight_class_Interim Light Heavyweight,weight_class_Interim Lightweight,weight_class_Interim Middleweight,weight_class_Interim Welterweight,weight_class_Light Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Open Weight,weight_class_Super Heavyweight,weight_class_Superfight Championship,weight_class_TUF Nations Canada vs. Australia Middleweight Tournament,weight_class_TUF Nations Canada vs. Australia Welterweight Tournament,weight_class_Ultimate Fighter 1 Light Heavyweight Tournament,weight_class_Ultimate Fighter 1 Middleweight Tournament,weight_class_Ultimate Fighter 10 Heavyweight Tournament,weight_class_Ultimate Fighter 11 Middleweight Tournament,weight_class_Ultimate Fighter 12 Lightweight Tournament,weight_class_Ultimate Fighter 13 Welterweight Tournament,weight_class_Ultimate Fighter 14 Bantamweight Tournament,weight_class_Ultimate Fighter 14 Featherweight Tournament,weight_class_Ultimate Fighter 15 Lightweight Tournament,weight_class_Ultimate Fighter 16 Welterweight Tournament,weight_class_Ultimate Fighter 17 Middleweight Tournament,weight_class_Ultimate Fighter 18 Bantamweight Tournament,weight_class_Ultimate Fighter 18 Women's Bantamweight Tournament,weight_class_Ultimate Fighter 19 Light Heavyweight Tournament,weight_class_Ultimate Fighter 19 Middleweight Tournament,weight_class_Ultimate Fighter 2 Heavyweight Tournament,weight_class_Ultimate Fighter 2 Welterweight Tournament,weight_class_Ultimate Fighter 21 Welterweight Tournament,weight_class_Ultimate Fighter 22 Lightweight Tournament,weight_class_Ultimate Fighter 23 Light Heavyweight Tournament,weight_class_Ultimate Fighter 23 Women's Strawweight Tournament,weight_class_Ultimate Fighter 25 Welterweight Tournament,weight_class_Ultimate Fighter 27 Featherweight Tournament,weight_class_Ultimate Fighter 27 Lightweight Tournament,weight_class_Ultimate Fighter 28 Heavyweight Tournament,weight_class_Ultimate Fighter 28 Women's Featherweight Tournament,weight_class_Ultimate Fighter 3 Light Heavyweight Tournament,weight_class_Ultimate Fighter 3 Middleweight Tournament,weight_class_Ultimate Fighter 4 Middleweight Tournament,weight_class_Ultimate Fighter 4 Welterweight Tournament,weight_class_Ultimate Fighter 5 Lightweight Tournament,weight_class_Ultimate Fighter 6 Welterweight Tournament,weight_class_Ultimate Fighter 7 Middleweight Tournament,weight_class_Ultimate Fighter 8 Light Heavyweight Tournament,weight_class_Ultimate Fighter 8 Lightweight Tournament,weight_class_Ultimate Fighter 9 Lightweight Tournament,weight_class_Ultimate Fighter 9 Welterweight Tournament,weight_class_Ultimate Fighter Australia vs. UK Lightweight Tournament,weight_class_Ultimate Fighter Australia vs. UK Welterweight Tournament,weight_class_Ultimate Fighter Brazil 1 Featherweight Tournament,weight_class_Ultimate Fighter Brazil 1 Middleweight Tournament,weight_class_Ultimate Fighter Brazil 2 Welterweight Tournament,weight_class_Ultimate Fighter Brazil 3 Heavyweight Tournament,weight_class_Ultimate Fighter Brazil 3 Middleweight Tournament,weight_class_Ultimate Fighter Brazil 4 Bantamweight Tournament,weight_class_Ultimate Fighter Brazil 4 Lightweight Tournament,weight_class_Ultimate Fighter China Featherweight Tournament,weight_class_Ultimate Fighter China Welterweight Tournament,weight_class_Ultimate Fighter Latin America 2 Lightweight Tournament,weight_class_Ultimate Fighter Latin America 2 Welterweight Tournament,weight_class_Ultimate Fighter Latin America 3 Lightweight Tournament,weight_class_Ultimate Fighter Latin America Bantamweight Tournament,weight_class_Ultimate Fighter Latin America Featherweight Tournament,weight_class_Ultimate Japan 2 Heavyweight Tournament,weight_class_Ultimate Japan Heavyweight Tournament,weight_class_Ultimate Ultimate '95 Tournament,weight_class_Ultimate Ultimate '96 Tournament,weight_class_Welterweight,weight_class_Women's Bantamweight,weight_class_Women's Featherweight,weight_class_Women's Flyweight,weight_class_Women's Strawweight
0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5853,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5854,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5855,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5856,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
#scaling the numerical data 