## Establish Dependencies and Load in Data

In [1]:
# Import all packages and dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import sys, warnings, os
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

In [2]:
#Ignore warnings when validating scores
if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore"

In [3]:
#Setting columns and rows to display all the results
pd.set_option("display.max_columns", None, "display.max_rows", None)

In [4]:
# Read in the dataset
ufc_master_df = pd.read_csv("../Resources/ufc-master.csv")
ufc_master_df.head()

Unnamed: 0,R_fighter,B_fighter,R_odds,B_odds,R_ev,B_ev,date,location,country,Winner,title_bout,weight_class,gender,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_SIG_STR_landed,B_avg_SIG_STR_pct,B_avg_SUB_ATT,B_avg_TD_landed,B_avg_TD_pct,B_longest_win_streak,B_losses,B_total_rounds_fought,B_total_title_bouts,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_wins,B_Stance,B_Height_cms,B_Reach_cms,B_Weight_lbs,R_current_lose_streak,R_current_win_streak,R_draw,R_avg_SIG_STR_landed,R_avg_SIG_STR_pct,R_avg_SUB_ATT,R_avg_TD_landed,R_avg_TD_pct,R_longest_win_streak,R_losses,R_total_rounds_fought,R_total_title_bouts,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_wins,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,R_age,B_age,lose_streak_dif,win_streak_dif,longest_win_streak_dif,win_dif,loss_dif,total_round_dif,total_title_bout_dif,ko_dif,sub_dif,height_dif,reach_dif,age_dif,sig_str_dif,avg_sub_att_dif,avg_td_dif,empty_arena,constant_1,B_match_weightclass_rank,R_match_weightclass_rank,R_Women's Flyweight_rank,R_Women's Featherweight_rank,R_Women's Strawweight_rank,R_Women's Bantamweight_rank,R_Heavyweight_rank,R_Light Heavyweight_rank,R_Middleweight_rank,R_Welterweight_rank,R_Lightweight_rank,R_Featherweight_rank,R_Bantamweight_rank,R_Flyweight_rank,R_Pound-for-Pound_rank,B_Women's Flyweight_rank,B_Women's Featherweight_rank,B_Women's Strawweight_rank,B_Women's Bantamweight_rank,B_Heavyweight_rank,B_Light Heavyweight_rank,B_Middleweight_rank,B_Welterweight_rank,B_Lightweight_rank,B_Featherweight_rank,B_Bantamweight_rank,B_Flyweight_rank,B_Pound-for-Pound_rank,better_rank,finish,finish_details,finish_round,finish_round_time,total_fight_time_secs,R_kd_bout,B_kd_bout,R_sig_str_landed_bout,B_sig_str_landed_bout,R_sig_str_attempted_bout,B_sig_str_attempted_bout,R_sig_str_pct_bout,B_sig_str_pct_bout,R_tot_str_landed_bout,B_tot_str_landed_bout,R_tot_str_attempted_bout,B_tot_str_attempted_bout,R_td_landed_bout,B_td_landed_bout,R_td_attempted_bout,B_td_attempted_bout,R_td_pct_bout,B_td_pct_bout,R_sub_attempts_bout,B_sub_attempts_bout,R_pass_bout,B_pass_bout,R_rev_bout,B_rev_bout
0,Alistair Overeem,Alexander Volkov,150,-182,150.0,54.945055,2/6/2021,"Las Vegas, Nevada, USA",USA,Blue,False,Heavyweight,MALE,5,0,1,0,4.76,0.58,0.2,0.69,0.7,4,2,26,0,0,1,2,3,0,0,6,Orthodox,200.66,203.2,250,0,2,0,3.71,0.64,0.8,1.44,0.56,11,15,83,1,1,0,4,20,8,0,33,Orthodox,193.04,203.2,265,40,32,0,-1,-7,-27,-13,-57,-1,-17,-8,7.62,0.0,-8,1.05,-0.6,-0.75,1,1,6.0,5.0,,,,,5.0,,,,,,,,,,,,,6.0,,,,,,,,,Red,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Cory Sandhagen,Frankie Edgar,-400,300,25.0,300.0,2/6/2021,"Las Vegas, Nevada, USA",USA,Red,False,Bantamweight,MALE,3,0,1,1,3.7,0.39,0.3,2.28,0.31,5,8,94,9,0,1,10,5,2,0,18,Orthodox,167.64,172.72,135,0,1,0,6.88,0.48,0.5,1.07,0.5,5,1,14,0,0,1,1,3,1,0,6,Switch,180.34,177.8,135,28,39,0,0,0,12,7,80,9,2,1,-12.7,-5.08,11,-3.18,-0.2,1.21,1,1,4.0,2.0,,,,,,,,,,,2.0,,,,,,,,,,,,,4.0,,,Red,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Alexandre Pantoja,Manel Kape,-125,100,80.0,100.0,2/6/2021,"Las Vegas, Nevada, USA",USA,Red,False,Flyweight,MALE,3,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,Southpaw,165.1,172.72,125,1,0,0,4.2,0.47,1.1,1.08,0.36,3,3,21,0,0,1,1,2,2,0,6,Orthodox,165.1,170.18,125,30,27,-1,0,-3,-6,-3,-21,0,-2,-2,0.0,2.54,-3,-4.2,-1.1,-1.08,1,1,,5.0,,,,,,,,,,,,5.0,,,,,,,,,,,,,,,Red,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Diego Ferreira,Beneil Dariush,-125,105,80.0,105.0,2/6/2021,"Las Vegas, Nevada, USA",USA,Blue,False,Lightweight,MALE,3,0,5,1,3.98,0.49,1.1,1.75,0.32,5,4,36,0,0,1,4,3,5,0,13,Southpaw,177.8,182.88,155,0,6,0,5.07,0.36,1.0,1.04,0.27,6,2,21,0,0,0,3,3,2,0,8,Orthodox,175.26,187.96,155,36,31,0,-1,-1,5,2,15,0,0,3,2.54,-5.08,-5,-1.09,0.1,0.71,1,1,13.0,10.0,,,,,,,,,10.0,,,,,,,,,,,,,13.0,,,,,Red,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Michael Johnson,Clay Guida,-225,175,44.444444,175.0,2/6/2021,"Las Vegas, Nevada, USA",USA,Blue,False,Lightweight,MALE,3,2,0,0,2.41,0.33,0.7,3.37,0.38,4,15,87,0,0,3,8,2,4,0,17,Orthodox,170.18,177.8,155,3,0,0,4.23,0.38,0.1,0.51,0.45,4,12,58,0,0,1,6,4,0,0,11,Southpaw,177.8,185.42,155,34,39,-1,0,0,6,3,29,0,-2,4,-7.62,-7.62,5,-1.82,0.6,2.86,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,neither,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [5]:
print(ufc_master_df.shape)
print(ufc_master_df.info())

(4566, 137)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4566 entries, 0 to 4565
Columns: 137 entries, R_fighter to B_rev_bout
dtypes: bool(1), float64(73), int64(47), object(16)
memory usage: 4.7+ MB
None


## Baseline Classification Model

In [6]:
# Splitting X (features) from y (labels)
#Classification task, the target value will be "Winner"
y = ufc_master_df.Winner
X = ufc_master_df.drop(['Winner'], axis=1)

In [7]:
# Begin encoding categorical variables by separating variabes by datatype
categorical_col = [col for col in X.columns if X[col].dtypes == 'object']
numeric_col = [col for col in X.columns if col not in categorical_col]

In [8]:
# Encode categorical columns first in order to avoid data leakage in split
enc = LabelEncoder()
for i in X[categorical_col]:
    #using astype(str) to avoid columns with 'float and str' to throw errors
    X[i] = enc.fit_transform(X[i].astype(str))
    

In [9]:
# Encode label (y) for Red fighter ID = 1 Blue fighter ID = 0
y = [1 if win == "Red" else 0 for win in y]
y

[0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [10]:
# Split dataset into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size = 0.25)

In [11]:
# Find and fill null values
# List column key/values in descending order
X_train.isnull().sum().sort_values(ascending=False)

B_Women's Featherweight_rank    3424
R_Women's Featherweight_rank    3417
B_Pound-for-Pound_rank          3403
B_Women's Flyweight_rank        3396
R_Women's Flyweight_rank        3387
B_Women's Strawweight_rank      3386
B_Featherweight_rank            3365
B_Women's Bantamweight_rank     3365
B_Lightweight_rank              3363
B_Light Heavyweight_rank        3362
B_Bantamweight_rank             3362
B_Flyweight_rank                3358
B_Welterweight_rank             3357
B_Middleweight_rank             3356
R_Women's Strawweight_rank      3356
B_Heavyweight_rank              3346
R_Women's Bantamweight_rank     3343
R_Featherweight_rank            3333
R_Lightweight_rank              3329
R_Middleweight_rank             3329
R_Flyweight_rank                3328
R_Light Heavyweight_rank        3326
R_Bantamweight_rank             3323
R_Welterweight_rank             3317
R_Heavyweight_rank              3317
R_Pound-for-Pound_rank          3312
R_rev_bout                      1330
B

In [12]:
# Creating a StandardScaler instance.

scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

##Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [13]:
# Saving impute scaler instances in case we decide to use instead of standar scaler
# Impute the numerical columns with mean.

#imp = SimpleImputer(strategy='most_frequent')
#scaler = imp.fit(X_train[numeric_col])
#X_train[numeric_col] = scaler.transform(X_train[numeric_col])
#X_test[numeric_col] = scaler.transform(X_test[numeric_col])

In [14]:
# Fill the categorical columns with 'most_frequent'
#imp1 = SimpleImputer(strategy='most_frequent')
#scaler1 = imp1.fit(X_train[categorical_col])
#X_train[categorical_col] = scaler1.transform(X_train[categorical_col])
#X_test[categorical_col] = scaler1.transform(X_test[categorical_col])

In [15]:
X_train.sample(10)

Unnamed: 0,R_fighter,B_fighter,R_odds,B_odds,R_ev,B_ev,date,location,country,title_bout,weight_class,gender,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_SIG_STR_landed,B_avg_SIG_STR_pct,B_avg_SUB_ATT,B_avg_TD_landed,B_avg_TD_pct,B_longest_win_streak,B_losses,B_total_rounds_fought,B_total_title_bouts,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_wins,B_Stance,B_Height_cms,B_Reach_cms,B_Weight_lbs,R_current_lose_streak,R_current_win_streak,R_draw,R_avg_SIG_STR_landed,R_avg_SIG_STR_pct,R_avg_SUB_ATT,R_avg_TD_landed,R_avg_TD_pct,R_longest_win_streak,R_losses,R_total_rounds_fought,R_total_title_bouts,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_wins,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,R_age,B_age,lose_streak_dif,win_streak_dif,longest_win_streak_dif,win_dif,loss_dif,total_round_dif,total_title_bout_dif,ko_dif,sub_dif,height_dif,reach_dif,age_dif,sig_str_dif,avg_sub_att_dif,avg_td_dif,empty_arena,constant_1,B_match_weightclass_rank,R_match_weightclass_rank,R_Women's Flyweight_rank,R_Women's Featherweight_rank,R_Women's Strawweight_rank,R_Women's Bantamweight_rank,R_Heavyweight_rank,R_Light Heavyweight_rank,R_Middleweight_rank,R_Welterweight_rank,R_Lightweight_rank,R_Featherweight_rank,R_Bantamweight_rank,R_Flyweight_rank,R_Pound-for-Pound_rank,B_Women's Flyweight_rank,B_Women's Featherweight_rank,B_Women's Strawweight_rank,B_Women's Bantamweight_rank,B_Heavyweight_rank,B_Light Heavyweight_rank,B_Middleweight_rank,B_Welterweight_rank,B_Lightweight_rank,B_Featherweight_rank,B_Bantamweight_rank,B_Flyweight_rank,B_Pound-for-Pound_rank,better_rank,finish,finish_details,finish_round,finish_round_time,total_fight_time_secs,R_kd_bout,B_kd_bout,R_sig_str_landed_bout,B_sig_str_landed_bout,R_sig_str_attempted_bout,B_sig_str_attempted_bout,R_sig_str_pct_bout,B_sig_str_pct_bout,R_tot_str_landed_bout,B_tot_str_landed_bout,R_tot_str_attempted_bout,B_tot_str_attempted_bout,R_td_landed_bout,B_td_landed_bout,R_td_attempted_bout,B_td_attempted_bout,R_td_pct_bout,B_td_pct_bout,R_sub_attempts_bout,B_sub_attempts_bout,R_pass_bout,B_pass_bout,R_rev_bout,B_rev_bout
1763,1241,1054,375,-470,375.0,21.276596,245,131,20,False,5,1,3,0,4,0,27.0,0.555,0.75,2.25,0.56,4,0,7,0,0,0,0,1,3,0,4,1,190.5,195.58,205,0,1,0,82.0,0.45,0.0,0.0,0.0,1,0,3,0,0,1,0,0,0,0,1,1,187.96,190.5,205,27,30,0,3,3,3,0,4,0,1,3,2.54,5.08,-3,-55.0,0.75,2.25,0,1,12,11,,,,,,5.0,,,,,,,,,,,,,7.0,,,,,,,,1,1,24,1.0,21,28.0,1.0,0.0,5.0,9.0,7.0,19.0,0.71,0.47,5.0,9.0,7.0,19.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1759,500,185,-380,315,26.315789,315.0,282,109,2,False,0,1,3,0,0,0,,,,,,0,0,0,0,0,0,0,0,0,0,0,3,167.64,167.64,135,0,2,0,28.285714,0.475714,0.571429,1.071429,0.368571,3,5,33,0,0,1,4,2,2,0,9,2,175.26,180.34,135,36,30,0,-2,-3,-9,5,-33,0,-2,-2,-7.62,-12.7,6,-28.285714,-0.571429,-1.071429,0,1,25,5,,,,,,,,,,,13.0,,,,,,,,,,,,,,,,1,5,8,1.0,101,108.0,,,,,,,,,,,,,,,,,,,,,,,,
1506,369,782,150,-170,150.0,58.823529,117,39,21,False,6,1,3,0,1,0,104.0,0.59,0.0,0.0,0.0,1,0,2,0,0,0,0,1,0,0,1,1,180.34,177.8,155,2,0,0,36.5,0.4,0.0,1.833333,0.208333,3,3,14,2,0,2,0,1,0,0,3,1,175.26,175.26,155,33,29,2,1,-2,-2,3,-12,-2,0,0,5.08,2.54,4,67.5,0.0,-1.833333,0,1,10,10,,,,,,,,,4.0,,,,,,,,,,,,,5.0,,,,,1,1,15,3.0,231,839.0,,,,,,,,,,,,,,,,,,,,,,,,
2036,747,24,150,-170,150.0,58.823529,56,74,23,False,8,1,3,1,0,0,35.285714,0.414286,0.0,0.142857,0.142857,5,2,14,0,0,1,1,3,0,0,5,1,180.34,185.42,170,0,1,0,24.4,0.634,0.2,0.4,0.132,2,2,13,0,0,0,2,1,0,0,3,2,182.88,187.96,170,25,24,-1,-1,3,2,0,1,0,2,0,-2.54,-2.54,1,10.885714,-0.2,-0.257143,0,1,5,26,,,,,,,,,,,,,,,,,,,,,14.0,,,,,,0,5,26,3.0,174,781.0,,,,,,,,,,,,,,,,,,,,,,,,
2544,331,1274,-380,315,26.315789,315.0,364,85,21,False,7,1,3,0,3,0,25.0,0.61,0.0,0.0,0.0,3,1,6,0,0,0,0,3,0,0,3,2,187.96,190.5,205,0,2,0,20.6,0.444,0.6,2.2,0.246,2,1,11,0,0,0,2,1,1,0,4,2,185.42,195.58,185,31,29,0,1,1,-1,0,-5,0,2,-1,2.54,-5.08,2,4.4,-0.6,-2.2,0,1,25,7,,,,,,,15.0,,,,,,,,,,,,,,,,,,,,1,1,25,1.0,132,139.0,,,,,,,,,,,,,,,,,,,,,,,,
4394,962,961,-230,190,43.478261,190.0,63,93,9,False,8,1,3,0,0,0,,,,,,0,0,0,0,0,0,0,0,0,0,0,1,175.26,177.8,170,0,0,0,,,,,,0,0,0,0,0,0,0,0,0,0,0,1,185.42,182.88,170,23,27,0,0,0,0,0,0,0,0,0,-10.16,-5.08,-4,0.0,0.0,0.0,0,1,25,26,,,,,,,,,,,,,,,,,,,,,,,,,,,3,6,32,3.0,292,900.0,0.0,0.0,50.0,33.0,98.0,94.0,0.51,0.35,230.0,62.0,290.0,129.0,3.0,3.0,4.0,4.0,0.75,0.75,2.0,2.0,6.0,0.0,0.0,1.0
2954,1099,1477,195,-215,195.0,46.511628,41,108,2,False,3,1,3,0,1,0,26.0,0.343333,0.333333,5.333333,0.573333,1,1,9,0,0,0,2,0,0,0,2,2,162.56,165.1,125,0,1,0,37.777778,0.415556,0.222222,2.0,0.335556,2,5,21,0,0,0,2,1,1,0,4,1,165.1,167.64,135,32,29,0,0,-1,-2,4,-12,0,-1,-1,-2.54,-2.54,3,-11.777778,0.111111,3.333333,0,1,25,7,,,,,,,,,,,,15.0,,,,,,,,,,,,,,,1,5,2,1.0,201,208.0,1.0,0.0,6.0,13.0,19.0,35.0,0.31,0.37,7.0,13.0,20.0,35.0,0.0,1.0,0.0,2.0,0.0,0.5,0.0,1.0,0.0,2.0,0.0,0.0
715,779,227,-115,-105,86.956522,95.238095,74,88,21,False,8,1,3,0,2,0,,0.2,0.3,1.35,0.26,2,1,7,0,0,0,1,0,1,0,2,2,187.96,190.5,170,1,0,0,,0.49,0.0,0.38,1.0,1,2,7,0,0,0,0,2,0,0,2,1,182.88,187.96,170,34,32,1,2,1,0,1,0,0,-2,1,5.08,2.54,2,0.0,0.3,0.97,0,1,25,26,,,,,,,,,,,,,,,,,,,,,,,,,,,3,1,24,3.0,116,723.0,,,,,,,,,,,,,,,,,,,,,,,,
2681,228,1457,-420,335,23.809524,335.0,239,66,21,True,7,1,5,0,3,0,10.631579,0.508421,0.421053,0.315789,0.144737,3,6,29,5,0,0,0,10,2,1,13,2,182.88,187.96,185,0,8,0,30.25,0.45,0.875,2.5,0.74875,8,0,19,3,0,0,3,3,2,0,8,1,187.96,198.12,185,30,38,0,-5,-5,5,-6,10,2,8,0,-5.08,-10.16,-8,-19.618421,-0.453947,-2.184211,0,1,8,0,,,,,,,0.0,,,,,,3.0,,,,,,,3.0,,,,,,14.0,1,1,25,1.0,166,173.0,0.0,0.0,46.0,18.0,69.0,32.0,0.66,0.56,50.0,19.0,73.0,36.0,1.0,0.0,2.0,0.0,0.5,0.0,0.0,0.0,2.0,0.0,0.0,0.0
4330,352,1139,-275,235,36.363636,235.0,165,66,21,False,6,1,3,0,1,0,37.875,0.3675,1.0,1.125,0.5825,2,3,20,0,0,0,3,1,1,0,5,1,175.26,177.8,155,0,0,0,,,,,,0,0,0,0,0,0,0,0,0,0,0,1,185.42,185.42,155,27,26,0,1,2,5,-3,20,0,1,1,-10.16,-7.62,1,37.875,1.0,1.125,0,1,25,26,,,,,,,,,,,,,,,,,,,,,,,,,,,3,5,26,2.0,220,528.0,,,,,,,,,,,,,,,,,,,,,,,,


In [16]:
# Dummy classifier used for simple model for comparison after optimization-DummyClassifier will allow us to run model with null values
simple_model = DummyClassifier(random_state=42)
simple_model.fit(X_train, y_train)

DummyClassifier(random_state=42)

In [17]:
prediction = simple_model.predict(X_test)
accuracy_score(y_test, prediction)

0.5175131348511384

#### Our simple model can predict winners with ~ 51.75% accuracy