In [1]:
# Perform initial import of the needed libraries

import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
import sklearn
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from IPython.display import display

In [83]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced
from sklearn.model_selection import train_test_split

In [3]:
# Read the CSV and perform basic data cleaning
file_path = Path('./Resources/fighters+fights_export.csv')
fnf_df = pd.read_csv(file_path)[:-2]

In [4]:
fnf_df.columns

Index(['fight_id', 'winner', 'title_bout', 'weight_class', 'no_of_rounds',
       'r_fighter', 'r_fighter_id', 'r_fighter_stance', 'b_fighter',
       'b_fighter_id', 'b_fighter_stance', 'date', 'referee', 'referee_id',
       'b_fighter_height', 'r_fighter_height', 'b_fighter_reach',
       'r_fighter_reach', 'b_fighter_weight', 'r_fighter_weight'],
      dtype='object')

In [5]:
columns = (['winner', 'title_bout', 'weight_class', 'no_of_rounds', 'r_fighter_stance', 'b_fighter_stance', 'b_fighter_height', 'r_fighter_height', 'b_fighter_reach',
       'r_fighter_reach', 'b_fighter_weight', 'r_fighter_weight'])

target = ["winner"]

In [6]:
fnf_df = fnf_df.loc[:, columns].copy()

In [7]:
fnf_df

Unnamed: 0,winner,title_bout,weight_class,no_of_rounds,r_fighter_stance,b_fighter_stance,b_fighter_height,r_fighter_height,b_fighter_reach,r_fighter_reach,b_fighter_weight,r_fighter_weight
0,Red,TRUE,Catch Weight,1,Southpaw,Orthodox,195.58,185.42,,,216.0,175.0
1,Red,FALSE,Open Weight,1,Southpaw,Orthodox,185.42,185.42,182.88,,205.0,175.0
2,Red,FALSE,Open Weight,1,Southpaw,Orthodox,185.42,185.42,,,196.0,175.0
3,Red,FALSE,Open Weight,1,Southpaw,,187.96,180.34,,,185.0,190.0
4,Red,FALSE,Open Weight,1,Orthodox,Orthodox,193.04,195.58,,,275.0,216.0
...,...,...,...,...,...,...,...,...,...,...,...,...
5137,Blue,FALSE,Women's Strawweight,3,Orthodox,Orthodox,165.10,160.02,167.64,162.56,115.0,115.0
5138,Red,TRUE,Women's Flyweight,5,Southpaw,Orthodox,167.64,165.10,167.64,167.64,125.0,125.0
5139,Red,FALSE,Women's Strawweight,3,,Orthodox,165.10,165.10,162.56,167.64,115.0,115.0
5140,Red,FALSE,Women's Flyweight,3,Orthodox,Orthodox,167.64,175.26,165.10,172.72,125.0,125.0


In [8]:
fnf_df.isnull().sum()

winner                0
title_bout            0
weight_class          0
no_of_rounds          0
r_fighter_stance    134
b_fighter_stance    159
b_fighter_height      8
r_fighter_height      4
b_fighter_reach     666
r_fighter_reach     316
b_fighter_weight      6
r_fighter_weight      3
dtype: int64

In [9]:
fnf_df['r_fighter_stance'].fillna("unlisted", inplace = True)
fnf_df['b_fighter_stance'].fillna("unlisted", inplace = True)
fnf_df['r_fighter_height'].fillna(value=fnf_df['r_fighter_height'].mean(), inplace = True)
fnf_df['b_fighter_height'].fillna(value=fnf_df['b_fighter_height'].mean(), inplace = True)
fnf_df['b_fighter_reach'].fillna(value=fnf_df['b_fighter_reach'].mean(), inplace = True)
fnf_df['r_fighter_reach'].fillna(value=fnf_df['r_fighter_reach'].mean(), inplace = True)
fnf_df['b_fighter_weight'].fillna(value=fnf_df['b_fighter_weight'].mean(), inplace = True)
fnf_df['r_fighter_weight'].fillna(value=fnf_df['r_fighter_weight'].mean(), inplace = True)

In [10]:
fnf_df.isnull().sum()

winner              0
title_bout          0
weight_class        0
no_of_rounds        0
r_fighter_stance    0
b_fighter_stance    0
b_fighter_height    0
r_fighter_height    0
b_fighter_reach     0
r_fighter_reach     0
b_fighter_weight    0
r_fighter_weight    0
dtype: int64

In [11]:
fnf_df

Unnamed: 0,winner,title_bout,weight_class,no_of_rounds,r_fighter_stance,b_fighter_stance,b_fighter_height,r_fighter_height,b_fighter_reach,r_fighter_reach,b_fighter_weight,r_fighter_weight
0,Red,TRUE,Catch Weight,1,Southpaw,Orthodox,195.58,185.42,183.289714,183.665789,216.0,175.0
1,Red,FALSE,Open Weight,1,Southpaw,Orthodox,185.42,185.42,182.880000,183.665789,205.0,175.0
2,Red,FALSE,Open Weight,1,Southpaw,Orthodox,185.42,185.42,183.289714,183.665789,196.0,175.0
3,Red,FALSE,Open Weight,1,Southpaw,unlisted,187.96,180.34,183.289714,183.665789,185.0,190.0
4,Red,FALSE,Open Weight,1,Orthodox,Orthodox,193.04,195.58,183.289714,183.665789,275.0,216.0
...,...,...,...,...,...,...,...,...,...,...,...,...
5137,Blue,FALSE,Women's Strawweight,3,Orthodox,Orthodox,165.10,160.02,167.640000,162.560000,115.0,115.0
5138,Red,TRUE,Women's Flyweight,5,Southpaw,Orthodox,167.64,165.10,167.640000,167.640000,125.0,125.0
5139,Red,FALSE,Women's Strawweight,3,unlisted,Orthodox,165.10,165.10,162.560000,167.640000,115.0,115.0
5140,Red,FALSE,Women's Flyweight,3,Orthodox,Orthodox,167.64,175.26,165.100000,172.720000,125.0,125.0


In [12]:
fnf_df.dtypes

winner               object
title_bout           object
weight_class         object
no_of_rounds          int64
r_fighter_stance     object
b_fighter_stance     object
b_fighter_height    float64
r_fighter_height    float64
b_fighter_reach     float64
r_fighter_reach     float64
b_fighter_weight    float64
r_fighter_weight    float64
dtype: object

In [13]:
# Generate our categorical variable lists
app_cat = fnf_df.dtypes[fnf_df.dtypes == "object"].index.tolist()

In [57]:
app_cat = [
 'title_bout',
 'weight_class',
 'r_fighter_stance',
 'b_fighter_stance']

In [58]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(fnf_df[app_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(app_cat)
encode_df.head()

Unnamed: 0,title_bout_FALSE,title_bout_TRUE,weight_class_Bantamweight,weight_class_Catch Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Open Weight,weight_class_Welterweight,weight_class_Women's Bantamweight,weight_class_Women's Featherweight,weight_class_Women's Flyweight,weight_class_Women's Strawweight,r_fighter_stance_Open Stance,r_fighter_stance_Orthodox,r_fighter_stance_Sideways,r_fighter_stance_Southpaw,r_fighter_stance_Switch,r_fighter_stance_unlisted,b_fighter_stance_Open Stance,b_fighter_stance_Orthodox,b_fighter_stance_Sideways,b_fighter_stance_Southpaw,b_fighter_stance_Switch,b_fighter_stance_unlisted
0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [59]:
pd.options.display.max_columns = None
display(encode_df)

Unnamed: 0,title_bout_FALSE,title_bout_TRUE,weight_class_Bantamweight,weight_class_Catch Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Open Weight,weight_class_Welterweight,weight_class_Women's Bantamweight,weight_class_Women's Featherweight,weight_class_Women's Flyweight,weight_class_Women's Strawweight,r_fighter_stance_Open Stance,r_fighter_stance_Orthodox,r_fighter_stance_Sideways,r_fighter_stance_Southpaw,r_fighter_stance_Switch,r_fighter_stance_unlisted,b_fighter_stance_Open Stance,b_fighter_stance_Orthodox,b_fighter_stance_Sideways,b_fighter_stance_Southpaw,b_fighter_stance_Switch,b_fighter_stance_unlisted
0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5137,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5138,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5139,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
5140,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [60]:
fnf_df

Unnamed: 0,winner,title_bout,weight_class,no_of_rounds,r_fighter_stance,b_fighter_stance,b_fighter_height,r_fighter_height,b_fighter_reach,r_fighter_reach,b_fighter_weight,r_fighter_weight
0,Red,TRUE,Catch Weight,1,Southpaw,Orthodox,195.58,185.42,183.289714,183.665789,216.0,175.0
1,Red,FALSE,Open Weight,1,Southpaw,Orthodox,185.42,185.42,182.880000,183.665789,205.0,175.0
2,Red,FALSE,Open Weight,1,Southpaw,Orthodox,185.42,185.42,183.289714,183.665789,196.0,175.0
3,Red,FALSE,Open Weight,1,Southpaw,unlisted,187.96,180.34,183.289714,183.665789,185.0,190.0
4,Red,FALSE,Open Weight,1,Orthodox,Orthodox,193.04,195.58,183.289714,183.665789,275.0,216.0
...,...,...,...,...,...,...,...,...,...,...,...,...
5137,Blue,FALSE,Women's Strawweight,3,Orthodox,Orthodox,165.10,160.02,167.640000,162.560000,115.0,115.0
5138,Red,TRUE,Women's Flyweight,5,Southpaw,Orthodox,167.64,165.10,167.640000,167.640000,125.0,125.0
5139,Red,FALSE,Women's Strawweight,3,unlisted,Orthodox,165.10,165.10,162.560000,167.640000,115.0,115.0
5140,Red,FALSE,Women's Flyweight,3,Orthodox,Orthodox,167.64,175.26,165.100000,172.720000,125.0,125.0


In [61]:
encode_df.columns

Index(['title_bout_FALSE', 'title_bout_TRUE ', 'weight_class_Bantamweight',
       'weight_class_Catch Weight', 'weight_class_Featherweight',
       'weight_class_Flyweight', 'weight_class_Heavyweight',
       'weight_class_Light Heavyweight', 'weight_class_Lightweight',
       'weight_class_Middleweight', 'weight_class_Open Weight',
       'weight_class_Welterweight', 'weight_class_Women's Bantamweight',
       'weight_class_Women's Featherweight', 'weight_class_Women's Flyweight',
       'weight_class_Women's Strawweight', 'r_fighter_stance_Open Stance',
       'r_fighter_stance_Orthodox', 'r_fighter_stance_Sideways',
       'r_fighter_stance_Southpaw', 'r_fighter_stance_Switch',
       'r_fighter_stance_unlisted', 'b_fighter_stance_Open Stance',
       'b_fighter_stance_Orthodox', 'b_fighter_stance_Sideways',
       'b_fighter_stance_Southpaw', 'b_fighter_stance_Switch',
       'b_fighter_stance_unlisted'],
      dtype='object')

In [62]:
fnf_df_num = pd.concat([fnf_df,encode_df],axis=1)

In [63]:
fnf_df_num

Unnamed: 0,winner,title_bout,weight_class,no_of_rounds,r_fighter_stance,b_fighter_stance,b_fighter_height,r_fighter_height,b_fighter_reach,r_fighter_reach,b_fighter_weight,r_fighter_weight,title_bout_FALSE,title_bout_TRUE,weight_class_Bantamweight,weight_class_Catch Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Open Weight,weight_class_Welterweight,weight_class_Women's Bantamweight,weight_class_Women's Featherweight,weight_class_Women's Flyweight,weight_class_Women's Strawweight,r_fighter_stance_Open Stance,r_fighter_stance_Orthodox,r_fighter_stance_Sideways,r_fighter_stance_Southpaw,r_fighter_stance_Switch,r_fighter_stance_unlisted,b_fighter_stance_Open Stance,b_fighter_stance_Orthodox,b_fighter_stance_Sideways,b_fighter_stance_Southpaw,b_fighter_stance_Switch,b_fighter_stance_unlisted
0,Red,TRUE,Catch Weight,1,Southpaw,Orthodox,195.58,185.42,183.289714,183.665789,216.0,175.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,Red,FALSE,Open Weight,1,Southpaw,Orthodox,185.42,185.42,182.880000,183.665789,205.0,175.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,Red,FALSE,Open Weight,1,Southpaw,Orthodox,185.42,185.42,183.289714,183.665789,196.0,175.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,Red,FALSE,Open Weight,1,Southpaw,unlisted,187.96,180.34,183.289714,183.665789,185.0,190.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,Red,FALSE,Open Weight,1,Orthodox,Orthodox,193.04,195.58,183.289714,183.665789,275.0,216.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5137,Blue,FALSE,Women's Strawweight,3,Orthodox,Orthodox,165.10,160.02,167.640000,162.560000,115.0,115.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5138,Red,TRUE,Women's Flyweight,5,Southpaw,Orthodox,167.64,165.10,167.640000,167.640000,125.0,125.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5139,Red,FALSE,Women's Strawweight,3,unlisted,Orthodox,165.10,165.10,162.560000,167.640000,115.0,115.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
5140,Red,FALSE,Women's Flyweight,3,Orthodox,Orthodox,167.64,175.26,165.100000,172.720000,125.0,125.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [64]:
fnf_df_num_2 = fnf_df_num.copy()

In [65]:
fnf_df_num_2 = fnf_df_num_2.replace(to_replace ="Red ", value = 1.0)
fnf_df_num_2 = fnf_df_num_2.replace(to_replace ="Blue", value = 2.0)
fnf_df_num_2 = fnf_df_num_2.replace(to_replace ="Draw", value = 3.0)

fnf_df_num_2.head(40)

Unnamed: 0,winner,title_bout,weight_class,no_of_rounds,r_fighter_stance,b_fighter_stance,b_fighter_height,r_fighter_height,b_fighter_reach,r_fighter_reach,b_fighter_weight,r_fighter_weight,title_bout_FALSE,title_bout_TRUE,weight_class_Bantamweight,weight_class_Catch Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Open Weight,weight_class_Welterweight,weight_class_Women's Bantamweight,weight_class_Women's Featherweight,weight_class_Women's Flyweight,weight_class_Women's Strawweight,r_fighter_stance_Open Stance,r_fighter_stance_Orthodox,r_fighter_stance_Sideways,r_fighter_stance_Southpaw,r_fighter_stance_Switch,r_fighter_stance_unlisted,b_fighter_stance_Open Stance,b_fighter_stance_Orthodox,b_fighter_stance_Sideways,b_fighter_stance_Southpaw,b_fighter_stance_Switch,b_fighter_stance_unlisted
0,1.0,True,Catch Weight,1,Southpaw,Orthodox,195.58,185.42,183.289714,183.665789,216.0,175.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,1.0,False,Open Weight,1,Southpaw,Orthodox,185.42,185.42,182.88,183.665789,205.0,175.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,False,Open Weight,1,Southpaw,Orthodox,185.42,185.42,183.289714,183.665789,196.0,175.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,1.0,False,Open Weight,1,Southpaw,unlisted,187.96,180.34,183.289714,183.665789,185.0,190.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1.0,False,Open Weight,1,Orthodox,Orthodox,193.04,195.58,183.289714,183.665789,275.0,216.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5,1.0,False,Open Weight,1,Orthodox,Orthodox,182.88,195.58,183.289714,183.665789,430.0,216.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
6,1.0,False,Open Weight,1,Orthodox,Orthodox,187.96,185.42,183.289714,182.88,225.0,205.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
7,1.0,False,Open Weight,1,Orthodox,Orthodox,195.58,193.04,183.289714,183.665789,250.0,275.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
8,1.0,False,Open Weight,1,Southpaw,Southpaw,190.5,185.42,183.289714,183.665789,260.0,175.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
9,1.0,False,Open Weight,1,Southpaw,Southpaw,180.34,185.42,183.289714,183.665789,190.0,175.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [69]:
fnf_df_num_2.drop(['title_bout', 'weight_class', 'r_fighter_stance', 'b_fighter_stance'], axis = 1 , inplace = True)
fnf_df_num_2

Unnamed: 0,winner,no_of_rounds,b_fighter_height,r_fighter_height,b_fighter_reach,r_fighter_reach,b_fighter_weight,r_fighter_weight,title_bout_FALSE,title_bout_TRUE,weight_class_Bantamweight,weight_class_Catch Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Open Weight,weight_class_Welterweight,weight_class_Women's Bantamweight,weight_class_Women's Featherweight,weight_class_Women's Flyweight,weight_class_Women's Strawweight,r_fighter_stance_Open Stance,r_fighter_stance_Orthodox,r_fighter_stance_Sideways,r_fighter_stance_Southpaw,r_fighter_stance_Switch,r_fighter_stance_unlisted,b_fighter_stance_Open Stance,b_fighter_stance_Orthodox,b_fighter_stance_Sideways,b_fighter_stance_Southpaw,b_fighter_stance_Switch,b_fighter_stance_unlisted
0,1.0,1,195.58,185.42,183.289714,183.665789,216.0,175.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,1.0,1,185.42,185.42,182.880000,183.665789,205.0,175.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,1,185.42,185.42,183.289714,183.665789,196.0,175.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,1.0,1,187.96,180.34,183.289714,183.665789,185.0,190.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1.0,1,193.04,195.58,183.289714,183.665789,275.0,216.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5137,2.0,3,165.10,160.02,167.640000,162.560000,115.0,115.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5138,1.0,5,167.64,165.10,167.640000,167.640000,125.0,125.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5139,1.0,3,165.10,165.10,162.560000,167.640000,115.0,115.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
5140,1.0,3,167.64,175.26,165.100000,172.720000,125.0,125.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [73]:
fnf_df_num_2.dtypes

winner                                float64
no_of_rounds                            int64
b_fighter_height                      float64
r_fighter_height                      float64
b_fighter_reach                       float64
r_fighter_reach                       float64
b_fighter_weight                      float64
r_fighter_weight                      float64
title_bout_FALSE                      float64
title_bout_TRUE                       float64
weight_class_Bantamweight             float64
weight_class_Catch Weight             float64
weight_class_Featherweight            float64
weight_class_Flyweight                float64
weight_class_Heavyweight              float64
weight_class_Light Heavyweight        float64
weight_class_Lightweight              float64
weight_class_Middleweight             float64
weight_class_Open Weight              float64
weight_class_Welterweight             float64
weight_class_Women's Bantamweight     float64
weight_class_Women's Featherweight

In [80]:
# Preprocess data, split the data into Training and Testing
    # Create feature set 
X = fnf_df_num_2.copy()
X = X.drop(columns='winner', axis=1)
X

Unnamed: 0,no_of_rounds,b_fighter_height,r_fighter_height,b_fighter_reach,r_fighter_reach,b_fighter_weight,r_fighter_weight,title_bout_FALSE,title_bout_TRUE,weight_class_Bantamweight,weight_class_Catch Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Open Weight,weight_class_Welterweight,weight_class_Women's Bantamweight,weight_class_Women's Featherweight,weight_class_Women's Flyweight,weight_class_Women's Strawweight,r_fighter_stance_Open Stance,r_fighter_stance_Orthodox,r_fighter_stance_Sideways,r_fighter_stance_Southpaw,r_fighter_stance_Switch,r_fighter_stance_unlisted,b_fighter_stance_Open Stance,b_fighter_stance_Orthodox,b_fighter_stance_Sideways,b_fighter_stance_Southpaw,b_fighter_stance_Switch,b_fighter_stance_unlisted
0,1,195.58,185.42,183.289714,183.665789,216.0,175.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,1,185.42,185.42,182.880000,183.665789,205.0,175.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1,185.42,185.42,183.289714,183.665789,196.0,175.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,1,187.96,180.34,183.289714,183.665789,185.0,190.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1,193.04,195.58,183.289714,183.665789,275.0,216.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5137,3,165.10,160.02,167.640000,162.560000,115.0,115.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5138,5,167.64,165.10,167.640000,167.640000,125.0,125.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5139,3,165.10,165.10,162.560000,167.640000,115.0,115.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
5140,3,167.64,175.26,165.100000,172.720000,125.0,125.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [81]:
# Create target
y = fnf_df_num_2["winner"]
y

0       1.0
1       1.0
2       1.0
3       1.0
4       1.0
       ... 
5137    2.0
5138    1.0
5139    1.0
5140    1.0
5141    2.0
Name: winner, Length: 5142, dtype: float64

In [90]:
# Check the balance of our target values
y.value_counts()

1.0    3469
2.0    1590
3.0      83
Name: winner, dtype: int64

In [94]:
# Split into Train and Test sets. 
x_Train, x_Test, y_Train, y_Test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [95]:
# Determine the shape of our training and testing sets.
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(3856, 35)
(1286, 35)
(3856,)
(1286,)


In [86]:
# Create a StandardScaler instance. 
scaler = StandardScaler()

In [96]:
# Fit the Standard Scaler with the training data. 
X_scaler = scaler.fit(X_train)

In [99]:
# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [103]:
# Resample the training data with the BalancedRandomForestClassifier
# YOUR CODE HERE
from imblearn.ensemble import BalancedRandomForestClassifier
brf = BalancedRandomForestClassifier(n_estimators=100, random_state=1)
brf.fit(X_train, y_train)

BalancedRandomForestClassifier(random_state=1)

In [104]:
y_pred = brf.predict(X_test)

In [105]:
# Calculated the balanced accuracy score
from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_test, y_pred)

0.35505352502014526

In [106]:
# Display the confusion matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

array([[335, 225, 326],
       [ 91, 133, 152],
       [ 10,   6,   8]])

In [107]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

        1.0       0.77      0.38      0.75      0.51      0.53      0.27       886
        2.0       0.37      0.35      0.75      0.36      0.51      0.25       376
        3.0       0.02      0.33      0.62      0.03      0.46      0.20        24

avg / total       0.64      0.37      0.74      0.45      0.52      0.27      1286



In [108]:
X.columns

Index(['no_of_rounds', 'b_fighter_height', 'r_fighter_height',
       'b_fighter_reach', 'r_fighter_reach', 'b_fighter_weight',
       'r_fighter_weight', 'title_bout_FALSE', 'title_bout_TRUE ',
       'weight_class_Bantamweight', 'weight_class_Catch Weight',
       'weight_class_Featherweight', 'weight_class_Flyweight',
       'weight_class_Heavyweight', 'weight_class_Light Heavyweight',
       'weight_class_Lightweight', 'weight_class_Middleweight',
       'weight_class_Open Weight', 'weight_class_Welterweight',
       'weight_class_Women's Bantamweight',
       'weight_class_Women's Featherweight', 'weight_class_Women's Flyweight',
       'weight_class_Women's Strawweight', 'r_fighter_stance_Open Stance',
       'r_fighter_stance_Orthodox', 'r_fighter_stance_Sideways',
       'r_fighter_stance_Southpaw', 'r_fighter_stance_Switch',
       'r_fighter_stance_unlisted', 'b_fighter_stance_Open Stance',
       'b_fighter_stance_Orthodox', 'b_fighter_stance_Sideways',
       'b_fighter_sta

In [109]:
# List the features sorted in descending order by feature importance
sorted(zip(brf.feature_importances_, X_train.columns), reverse=True)

[(0.15744900108654195, 'b_fighter_reach'),
 (0.13460998815194375, 'r_fighter_reach'),
 (0.13025160996661275, 'r_fighter_height'),
 (0.12371293574550336, 'b_fighter_height'),
 (0.08046051232089836, 'b_fighter_weight'),
 (0.07599792543906805, 'r_fighter_weight'),
 (0.02760347732352587, 'b_fighter_stance_Orthodox'),
 (0.026279126196951302, 'no_of_rounds'),
 (0.02509714998898421, 'r_fighter_stance_Orthodox'),
 (0.02420025862437564, 'b_fighter_stance_Southpaw'),
 (0.022455278008122373, 'r_fighter_stance_Southpaw'),
 (0.01869697526919961, 'weight_class_Welterweight'),
 (0.01662835930502647, 'weight_class_Lightweight'),
 (0.015394061394993129, 'weight_class_Featherweight'),
 (0.015240363236778083, 'weight_class_Middleweight'),
 (0.011279862770840898, 'title_bout_FALSE'),
 (0.01091810179623833, 'weight_class_Heavyweight'),
 (0.01074797759165293, 'title_bout_TRUE '),
 (0.01042681189813502, 'weight_class_Bantamweight'),
 (0.010252492570081692, 'weight_class_Light Heavyweight'),
 (0.0090386824155