In [1]:
# import library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from collections import Counter
from imblearn.over_sampling import RandomOverSampler, SMOTE, BorderlineSMOTE, SVMSMOTE, ADASYN
from imblearn.under_sampling import RandomUnderSampler, NearMiss, TomekLinks
from sklearn.model_selection import StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier

In [3]:
df = pd.read_csv('dataframe/UFC_kombinasi9_all_features.csv')
df.head()

Unnamed: 0,weight_class,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,...,gender,B_Stance_Open Stance,B_Stance_Orthodox,B_Stance_Southpaw,B_Stance_Switch,R_Stance_Open Stance,R_Stance_Orthodox,R_Stance_Southpaw,R_Stance_Switch,Winner
0,5,0.785156,0.0,0.394141,0.352422,0.239219,0.011484,0.15625,0.132812,0.0,...,1,False,True,False,False,False,True,False,False,Blue
1,6,0.695312,0.0,0.600839,0.185547,0.088281,0.104375,0.09375,0.0625,0.0,...,1,False,False,True,False,False,True,False,False,Blue
2,3,0.5,0.266602,0.381462,0.456558,0.429614,0.46957,1.351562,0.000244,0.125,...,1,False,True,False,False,False,True,False,False,Red
3,5,0.0,0.0,0.47,0.25,0.75,0.0,0.0,0.0,0.0,...,1,False,True,False,False,False,True,False,False,Red
4,6,0.5,0.0,0.46,0.2925,0.125,0.1,0.0,0.0,0.0,...,1,False,True,False,False,False,True,False,False,Blue


In [4]:
class_mapping = {'Blue': 0, 'Draw': 1, 'Red': 2}
df_xgb = df.copy()

df_xgb["Winner"]= df_xgb['Winner'].map(class_mapping)

In [5]:
df = df_xgb

In [6]:
X = df.drop('Winner', axis=1).values
X

array([[5, 0.78515625, 0.0, ..., True, False, False],
       [6, 0.6953125, 0.0, ..., True, False, False],
       [3, 0.5, 0.2666015625, ..., True, False, False],
       ...,
       [6, 0.015625, 0.0, ..., False, True, False],
       [5, 0.78515625, 0.125, ..., True, False, False],
       [8, 0.5, 0.0, ..., True, False, False]], dtype=object)

In [7]:
y = df['Winner'].values
y

array([0, 0, 2, ..., 2, 0, 2], dtype=int64)

In [8]:
counter_y = Counter(y)
print(counter_y)

Counter({2: 2141, 0: 1277, 1: 62})


In [9]:
# Over Sampling
adasyn = ADASYN()
randomOver = RandomOverSampler()
smote = SMOTE()
borderSmote = BorderlineSMOTE()
svmSmote = SVMSMOTE()

# Melakukan resampling
X_adasyn, y_adasyn = adasyn.fit_resample(X, y)
X_randomOver, y_randomOver = randomOver.fit_resample(X, y)
X_smote, y_smote = smote.fit_resample(X, y)
X_borderSmote, y_borderSmote = borderSmote.fit_resample(X, y)
X_svmSmote, y_svmSmote = svmSmote.fit_resample(X, y)


# Under Sampling
rand_under = RandomUnderSampler(sampling_strategy='majority')
nearmiss = NearMiss()
nearmiss2 = NearMiss(version=2)
nearmiss3 = NearMiss(version=3)
tomek = TomekLinks()

# Melakukan resampling
X_rand_under, y_rand_under = rand_under.fit_resample(X, y)
X_nearmiss, y_nearmiss = nearmiss.fit_resample(X, y)
X_nearmiss2, y_nearmiss2 = nearmiss2.fit_resample(X, y)
X_nearmiss3, y_nearmiss3 = nearmiss3.fit_resample(X, y)
X_tomek, y_tomek = tomek.fit_resample(X, y)


In [12]:
def evaluate_classification(classifier):
  cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

  scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
  print("ADASYN OverSampling")
  for i in range(len(scoring)):
    score = cross_val_score(classifier, X_adasyn, y_adasyn, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

  print()

  print("Random OverSampling")
  for i in range(len(scoring)):
    score = cross_val_score(classifier, X_randomOver, y_randomOver, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

  print()

  print("SMOTE OverSampling")
  for i in range(len(scoring)):
    score = cross_val_score(classifier, X_smote, y_smote, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

  print()

  print("Borderline SMOTE OverSampling")
  for i in range(len(scoring)):
    score = cross_val_score(classifier, X_borderSmote, y_borderSmote, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

  print()

  print("SVM SMOTE OverSampling")
  for i in range(len(scoring)):
    score = cross_val_score(classifier, X_svmSmote, y_svmSmote, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

  print()

  print("Random Undersampling")
  for i in range(len(scoring)):
    score = cross_val_score(classifier, X_rand_under, y_rand_under , scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

  print()

  print("Near Miss 1 Undersampling")
  for i in range(len(scoring)):
    score = cross_val_score(classifier, X_nearmiss, y_nearmiss, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

  print()

  print("Near Miss 2 Undersampling")
  for i in range(len(scoring)):
    score = cross_val_score(classifier, X_nearmiss2, y_nearmiss2, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

  print()

  print("Near Miss 3 Undersampling")
  for i in range(len(scoring)):
    score = cross_val_score(classifier, X_nearmiss3, y_nearmiss3, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

  print()

  print("Tomek Links Undersampling")
  for i in range(len(scoring)):
    score = cross_val_score(classifier, X_tomek, y_tomek, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

# Decision Tree

In [10]:
dt = DecisionTreeClassifier()
dt

In [11]:
evaluate_classification(dt)

ADASYN OverSampling


f1_micro score: 0.7212167857765682
f1_macro score: 0.7181945930159586
precision_micro score: 0.7251200020029112
precision_macro score: 0.7204271558579404
recall_micro score: 0.7241895548184688
recall_macro score: 0.7233793144939494

Random OverSampling
f1_micro score: 0.8456676831196877
f1_macro score: 0.8368110544621246
precision_micro score: 0.8395243381140421
precision_macro score: 0.8456674107078271
recall_micro score: 0.8425017309163761
recall_macro score: 0.839614938095572

SMOTE OverSampling
f1_micro score: 0.7112539893398081
f1_macro score: 0.711653286193266
precision_micro score: 0.7121846114385193
precision_macro score: 0.7110172070248624
recall_micro score: 0.7117188887820155
recall_macro score: 0.7177703497658618

Borderline SMOTE OverSampling
f1_micro score: 0.7469966109567224
f1_macro score: 0.7446836537064249
precision_micro score: 0.7469051908427375
precision_macro score: 0.7433592309912026
recall_micro score: 0.7496973604283488
recall_macro score: 0.7499745473717564

S

Karena Random Oversampling menghasilkan score terbaik maka akan digunakan Random Oversampling

In [12]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(dt, X_randomOver, y_randomOver, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.8417569819404115
f1_macro score: 0.8379619715001215
precision_micro score: 0.8418509583514224
precision_macro score: 0.8482557372772039
recall_micro score: 0.8413844991347584
recall_macro score: 0.8414766160471029


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X_randomOver, y_randomOver, test_size=0.25, random_state=42)

## Hyperparameter Tuning

In [14]:
param_grid_c = {'criterion': ['gini', 'entropy'],
               'min_samples_split': [2, 5, 10],
               'max_depth' : [None, 10, 20, 50],
               'min_samples_leaf': [1, 2, 4]
               }

clf_dtc = GridSearchCV(estimator= DecisionTreeClassifier(), param_grid=param_grid_c, cv= 5)
clf_dtc.fit(X_train, y_train)

In [15]:
print("Hyperparameter terbaik untuk Decision Tree Classifier")
print(clf_dtc.best_params_)

Hyperparameter terbaik untuk Decision Tree Classifier
{'criterion': 'entropy', 'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2}


In [16]:
best_hypeparam_dtc = DecisionTreeClassifier(**clf_dtc.best_params_)
best_hypeparam_dtc.fit(X_train, y_train)

In [17]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(best_hypeparam_dtc, X_train, y_train, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.7986836775086911
f1_macro score: 0.7963262772994624
precision_micro score: 0.8001731273672164
precision_macro score: 0.805788214074504
recall_micro score: 0.7999246803535864
recall_macro score: 0.7989455667852969


In [13]:
def classification_metrics(prediction, y_test):
  accuracy = accuracy_score(y_test, prediction)
  f1 = f1_score(y_test, prediction, average="macro")
  recall = recall_score(y_test, prediction, average="macro")
  precision = precision_score(y_test, prediction, average="macro")

  print('Accuracy: ' + str(accuracy))
  print('F1 Score: ' + str(f1))
  print('Recall Score: ' + str(recall))
  print('Precision Score: ' + str(precision))

In [19]:
prediction = best_hypeparam_dtc.predict(X_test)
classification_metrics(prediction, y_test)

Accuracy: 0.8339538346984363
F1 Score: 0.8318367609671232
Recall Score: 0.8355769940688603
Precision Score: 0.8395026347867974


In [20]:
dt.fit(X_train, y_train)
prediction = dt.predict(X_test)
classification_metrics(prediction, y_test)

Accuracy: 0.8276247207743858
F1 Score: 0.825615369386176
Recall Score: 0.8293041780490406
Precision Score: 0.8338456107825182


# Random Forest

In [92]:
rf = RandomForestClassifier()

In [22]:
evaluate_classification(rf)

ADASYN OverSampling


f1_micro score: 0.8452748563860922
f1_macro score: 0.8414868804639493
precision_micro score: 0.8453671888624328
precision_macro score: 0.8398981422589724
recall_micro score: 0.8451820059154944
recall_macro score: 0.8478579136299829

Random OverSampling
f1_micro score: 0.8859728183304695
f1_macro score: 0.8834234582832602
precision_micro score: 0.8818773272051528
precision_macro score: 0.882246129361792
recall_micro score: 0.8858791452089362
recall_macro score: 0.8842962478150996

SMOTE OverSampling
f1_micro score: 0.8458540328401846
f1_macro score: 0.8462084404771002
precision_micro score: 0.8434334795188615
precision_macro score: 0.8454288895480181
recall_micro score: 0.8424093576068901
recall_macro score: 0.8441782307306722

Borderline SMOTE OverSampling
f1_micro score: 0.8477144538233106
f1_macro score: 0.8474011093872518
precision_micro score: 0.8473419276905891
precision_macro score: 0.8481578626019493
recall_micro score: 0.8459458862248519
recall_macro score: 0.8458510917359472



Karena score nya paling tinggi maka akan digunakan Random Oversampling

In [93]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(rf, X_randomOver, y_randomOver, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.8805742223007886
f1_macro score: 0.8805567588370946
precision_micro score: 0.879735496913813
precision_macro score: 0.8799383279868742
recall_micro score: 0.8787114183289096
recall_macro score: 0.8789893748263118


## Hyperparameter Tuning

In [95]:
param_grid_c = {
                'criterion': ['gini', 'entropy'],
               'min_samples_split': [2, 5, 10],
               'max_depth' : [None, 10, 20, 30],
               'min_samples_leaf': [1, 2, 4]
}

clf_rfc = GridSearchCV(estimator=RandomForestClassifier(), param_grid=param_grid_c, cv=cv)
clf_rfc.fit(X_train, y_train)

In [96]:
print("Hyperparameter terbaik untuk Decision Tree Classifier")
print(clf_rfc.best_params_)

Hyperparameter terbaik untuk Decision Tree Classifier
{'criterion': 'entropy', 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 5}


In [97]:
best_hypeparam_rfc = RandomForestClassifier(**clf_rfc.best_params_)
best_hypeparam_rfc.fit(X_train, y_train)

In [27]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(best_hypeparam_rfc, X_train, y_train, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.8469667284318574
f1_macro score: 0.8457015100170413
precision_micro score: 0.843492012882894
precision_macro score: 0.8449712407254802
recall_micro score: 0.8462221575305013
recall_macro score: 0.8465934080586337


In [28]:
prediction_rf = best_hypeparam_rfc.predict(X_test)
classification_metrics(prediction_rf, y_test)

Accuracy: 0.8715562174236783
F1 Score: 0.8723924833932717
Recall Score: 0.8724895200124694
Precision Score: 0.8724791911066422


# Logistic Regression

In [29]:
logistic = LogisticRegression(max_iter=2000)
logistic

In [30]:
evaluate_classification(logistic)

ADASYN OverSampling


f1_micro score: 0.5859117648582336
f1_macro score: 0.5830382177643059
precision_micro score: 0.5859117648582336
precision_macro score: 0.5833519393944753
recall_micro score: 0.5859117648582336
recall_macro score: 0.585442866709181

Random OverSampling
f1_micro score: 0.5721854519836432
f1_macro score: 0.5708080996787424
precision_micro score: 0.5721854519836432
precision_macro score: 0.5710988152215268
recall_micro score: 0.5721854519836432
recall_macro score: 0.5721818356539377

SMOTE OverSampling
f1_micro score: 0.5925708375902178
f1_macro score: 0.5902618807914692
precision_micro score: 0.5925708375902178
precision_macro score: 0.5901893240335436
recall_micro score: 0.5925708375902178
recall_macro score: 0.5925643263234718

Borderline SMOTE OverSampling
f1_micro score: 0.6610812876458064
f1_macro score: 0.6536985852376753
precision_micro score: 0.6610812876458064
precision_macro score: 0.6532892537670374
recall_micro score: 0.6610812876458064
recall_macro score: 0.6610825418345112



Karena Nearmiss 1 undersampling menghasilkan nilai terbaik maka akan digunakan

In [31]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(logistic, X_nearmiss, y_nearmiss, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.7203954802259886
f1_macro score: 0.7255289175801811
precision_micro score: 0.7203954802259886
precision_macro score: 0.742341030441717
recall_micro score: 0.7203954802259886
recall_macro score: 0.7205263157894737


In [32]:
X_train, X_test, y_train, y_test = train_test_split(X_nearmiss, y_nearmiss, test_size=0.25, random_state=42)

In [33]:
logistic.max_iter = 10000
logistic.fit(X_train, y_train)

In [34]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(logistic, X_train, y_train, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.6842424242424242
f1_macro score: 0.6885655632886215
precision_micro score: 0.6842424242424242
precision_macro score: 0.7189281954640684
recall_micro score: 0.6842424242424242
recall_macro score: 0.6818253968253968


In [35]:
prediction_logreg = logistic.predict(X_test)
classification_metrics(prediction_logreg, y_test)

Accuracy: 0.72
F1 Score: 0.7314034802406897
Recall Score: 0.727743271221532
Precision Score: 0.7408289241622574


# Softmax Regression

In [36]:
softmax = LogisticRegression(max_iter=2000, multi_class='multinomial')
softmax

In [37]:
evaluate_classification(softmax)

ADASYN OverSampling


f1_micro score: 0.5859117648582336
f1_macro score: 0.5830382177643059
precision_micro score: 0.5859117648582336
precision_macro score: 0.5833519393944753
recall_micro score: 0.5859117648582336
recall_macro score: 0.585442866709181

Random OverSampling
f1_micro score: 0.5721854519836432
f1_macro score: 0.5708080996787424
precision_micro score: 0.5721854519836432
precision_macro score: 0.5710988152215268
recall_micro score: 0.5721854519836432
recall_macro score: 0.5721818356539377

SMOTE OverSampling
f1_micro score: 0.5925708375902178
f1_macro score: 0.5902618807914692
precision_micro score: 0.5925708375902178
precision_macro score: 0.5901893240335436
recall_micro score: 0.5925708375902178
recall_macro score: 0.5925643263234718

Borderline SMOTE OverSampling
f1_micro score: 0.6610812876458064
f1_macro score: 0.6536985852376753
precision_micro score: 0.6610812876458064
precision_macro score: 0.6532892537670374
recall_micro score: 0.6610812876458064
recall_macro score: 0.6610825418345112



Karena Nearmiss 1 undersampling menghasilkan nilai terbaik maka akan digunakan

In [38]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(softmax, X_nearmiss, y_nearmiss, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.7203954802259886
f1_macro score: 0.7255289175801811
precision_micro score: 0.7203954802259886
precision_macro score: 0.742341030441717
recall_micro score: 0.7203954802259886
recall_macro score: 0.7205263157894737


In [39]:
softmax.max_iter = 10000
softmax.fit(X_train, y_train)

In [40]:
prediction_softmax = softmax.predict(X_test)
classification_metrics(prediction_softmax, y_test)

Accuracy: 0.72
F1 Score: 0.7314034802406897
Recall Score: 0.727743271221532
Precision Score: 0.7408289241622574


# KNN

In [41]:
knn = KNeighborsClassifier()
knn

In [42]:
evaluate_classification(knn)

ADASYN OverSampling


f1_micro score: 0.7214944306984117
f1_macro score: 0.7009685554498203
precision_micro score: 0.7214944306984117
precision_macro score: 0.721462240166198
recall_micro score: 0.7214944306984117
recall_macro score: 0.7206894923163788

Random OverSampling
f1_micro score: 0.7336882686763493
f1_macro score: 0.7273848956687254
precision_micro score: 0.7336882686763493
precision_macro score: 0.7257128921016075
recall_micro score: 0.7336882686763493
recall_macro score: 0.7336869430614318

SMOTE OverSampling
f1_micro score: 0.7289415717153968
f1_macro score: 0.7109074940344692
precision_micro score: 0.7289415717153965
precision_macro score: 0.7285688265715726
recall_micro score: 0.7289415717153965
recall_macro score: 0.7289398980336546

Borderline SMOTE OverSampling
f1_micro score: 0.7522117168524098
f1_macro score: 0.7413021420767496
precision_micro score: 0.7522117168524098
precision_macro score: 0.7558431723784282
recall_micro score: 0.7522117168524098
recall_macro score: 0.7522133397744066



Karena Borderline SMOTE oversampling menghasilkan nilai terbaik maka akan digunakan

In [43]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(knn, X_borderSmote, y_borderSmote, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.7522117168524098
f1_macro score: 0.7413021420767496
precision_micro score: 0.7522117168524098
precision_macro score: 0.7558431723784282
recall_micro score: 0.7522117168524098
recall_macro score: 0.7522133397744066


In [44]:
X_train, X_test, y_train, y_test = train_test_split(X_borderSmote, y_borderSmote, test_size=0.25, random_state=42)

## Hyperparameter Tuning

In [45]:
tuned_params = [{'n_neighbors': [2,3,4,5,6,7,8,9,10],'metric': ['euclidean', 'manhattan', 'jaccard']}]

clf_knn = GridSearchCV(knn, tuned_params, cv=cv, verbose = 2)
clf_knn.fit(X_train, y_train)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


[CV] END ....................metric=euclidean, n_neighbors=2; total time=   0.2s
[CV] END ....................metric=euclidean, n_neighbors=2; total time=   0.2s
[CV] END ....................metric=euclidean, n_neighbors=2; total time=   0.3s
[CV] END ....................metric=euclidean, n_neighbors=2; total time=   0.2s
[CV] END ....................metric=euclidean, n_neighbors=2; total time=   0.2s
[CV] END ....................metric=euclidean, n_neighbors=3; total time=   0.1s
[CV] END ....................metric=euclidean, n_neighbors=3; total time=   0.2s
[CV] END ....................metric=euclidean, n_neighbors=3; total time=   0.2s
[CV] END ....................metric=euclidean, n_neighbors=3; total time=   0.2s
[CV] END ....................metric=euclidean, n_neighbors=3; total time=   0.1s
[CV] END ....................metric=euclidean, n_neighbors=4; total time=   0.1s
[CV] END ....................metric=euclidean, n_neighbors=4; total time=   0.3s
[CV] END ...................

Traceback (most recent call last):
  File "c:\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\sklearn\metrics\_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\sklearn\base.py", line 706, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
                             ^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\sklearn\neighbors\_classification.py", line 254, in predict
    probabilities = self.predict_proba(X)
                    ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\sklearn\neighbors\_classification.py", line 332, in predict_proba
    probabilities = ArgKminClassMode.compute(
                    ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python

[CV] END ....................metric=manhattan, n_neighbors=4; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=4; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=4; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=4; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=4; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=5; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=5; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=5; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=5; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=5; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=6; total time=   0.0s


Traceback (most recent call last):
  File "c:\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\sklearn\metrics\_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\sklearn\base.py", line 706, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
                             ^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\sklearn\neighbors\_classification.py", line 254, in predict
    probabilities = self.predict_proba(X)
                    ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\sklearn\neighbors\_classification.py", line 332, in predict_proba
    probabilities = ArgKminClassMode.compute(
                    ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python

[CV] END ....................metric=manhattan, n_neighbors=6; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=6; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=6; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=6; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=7; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=7; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=7; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=7; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=7; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=8; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=8; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=8; total time=   0.0s
[CV] END ...................

Traceback (most recent call last):
  File "c:\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\sklearn\metrics\_scorer.py", line 527, in __call__
    return estimator.score(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\sklearn\base.py", line 706, in score
    return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
                             ^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\sklearn\neighbors\_classification.py", line 254, in predict
    probabilities = self.predict_proba(X)
                    ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\sklearn\neighbors\_classification.py", line 332, in predict_proba
    probabilities = ArgKminClassMode.compute(
                    ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python

[CV] END ....................metric=manhattan, n_neighbors=9; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=9; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=9; total time=   0.0s
[CV] END ....................metric=manhattan, n_neighbors=9; total time=   0.0s
[CV] END ...................metric=manhattan, n_neighbors=10; total time=   0.0s
[CV] END ...................metric=manhattan, n_neighbors=10; total time=   0.0s
[CV] END ...................metric=manhattan, n_neighbors=10; total time=   0.0s
[CV] END ...................metric=manhattan, n_neighbors=10; total time=   0.0s
[CV] END ...................metric=manhattan, n_neighbors=10; total time=   0.0s
[CV] END ......................metric=jaccard, n_neighbors=2; total time=   6.7s




[CV] END ......................metric=jaccard, n_neighbors=2; total time=   8.6s




[CV] END ......................metric=jaccard, n_neighbors=2; total time=   7.1s




[CV] END ......................metric=jaccard, n_neighbors=2; total time=   7.2s




[CV] END ......................metric=jaccard, n_neighbors=2; total time=   7.2s




[CV] END ......................metric=jaccard, n_neighbors=3; total time=   6.8s




[CV] END ......................metric=jaccard, n_neighbors=3; total time=   7.2s




[CV] END ......................metric=jaccard, n_neighbors=3; total time=   7.1s




[CV] END ......................metric=jaccard, n_neighbors=3; total time=   7.0s




[CV] END ......................metric=jaccard, n_neighbors=3; total time=   7.6s




[CV] END ......................metric=jaccard, n_neighbors=4; total time=   7.1s




[CV] END ......................metric=jaccard, n_neighbors=4; total time=   6.7s




[CV] END ......................metric=jaccard, n_neighbors=4; total time=   7.8s




[CV] END ......................metric=jaccard, n_neighbors=4; total time=   8.4s




[CV] END ......................metric=jaccard, n_neighbors=4; total time=   6.2s




[CV] END ......................metric=jaccard, n_neighbors=5; total time=   5.5s




[CV] END ......................metric=jaccard, n_neighbors=5; total time=   6.0s




[CV] END ......................metric=jaccard, n_neighbors=5; total time=   5.7s




[CV] END ......................metric=jaccard, n_neighbors=5; total time=   5.4s




[CV] END ......................metric=jaccard, n_neighbors=5; total time=   5.7s




[CV] END ......................metric=jaccard, n_neighbors=6; total time=   3.7s




[CV] END ......................metric=jaccard, n_neighbors=6; total time=   4.1s




[CV] END ......................metric=jaccard, n_neighbors=6; total time=   4.3s




[CV] END ......................metric=jaccard, n_neighbors=6; total time=   4.3s




[CV] END ......................metric=jaccard, n_neighbors=6; total time=   4.3s




[CV] END ......................metric=jaccard, n_neighbors=7; total time=   4.3s




[CV] END ......................metric=jaccard, n_neighbors=7; total time=   4.3s




[CV] END ......................metric=jaccard, n_neighbors=7; total time=   4.5s




[CV] END ......................metric=jaccard, n_neighbors=7; total time=   4.5s




[CV] END ......................metric=jaccard, n_neighbors=7; total time=   4.5s




[CV] END ......................metric=jaccard, n_neighbors=8; total time=   4.2s




[CV] END ......................metric=jaccard, n_neighbors=8; total time=   4.0s




[CV] END ......................metric=jaccard, n_neighbors=8; total time=   4.2s




[CV] END ......................metric=jaccard, n_neighbors=8; total time=   4.0s




[CV] END ......................metric=jaccard, n_neighbors=8; total time=   4.0s




[CV] END ......................metric=jaccard, n_neighbors=9; total time=   4.1s




[CV] END ......................metric=jaccard, n_neighbors=9; total time=   3.7s




[CV] END ......................metric=jaccard, n_neighbors=9; total time=   4.2s




[CV] END ......................metric=jaccard, n_neighbors=9; total time=   4.1s




[CV] END ......................metric=jaccard, n_neighbors=9; total time=   4.2s




[CV] END .....................metric=jaccard, n_neighbors=10; total time=   4.0s




[CV] END .....................metric=jaccard, n_neighbors=10; total time=   4.4s




[CV] END .....................metric=jaccard, n_neighbors=10; total time=   4.4s




[CV] END .....................metric=jaccard, n_neighbors=10; total time=   4.0s




[CV] END .....................metric=jaccard, n_neighbors=10; total time=   3.6s


 0.70609381 0.69889577 0.6951723         nan        nan        nan
        nan        nan        nan        nan        nan        nan
 0.70795577 0.73625493 0.72036703 0.73240632 0.72185671 0.72570395
 0.71689016 0.72433887 0.71875374]


In [46]:
print("Hyperparameter terbaik untuk Decision Tree Classifier")
print(clf_knn.best_params_)

Hyperparameter terbaik untuk Decision Tree Classifier
{'metric': 'euclidean', 'n_neighbors': 3}


In [47]:
best_hypeparam_knn = KNeighborsClassifier(**clf_knn.best_params_)
best_hypeparam_knn.fit(X_train, y_train)

In [48]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(best_hypeparam_knn, X_train, y_train, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.7546222234544454
f1_macro score: 0.7440798354728558
precision_micro score: 0.7546222234544454
precision_macro score: 0.758287500775975
recall_micro score: 0.7546222234544454
recall_macro score: 0.753836383020513


In [49]:
prediction_knn = best_hypeparam_knn.predict(X_test)
classification_metrics(prediction_knn, y_test)

Accuracy: 0.7784810126582279
F1 Score: 0.7715069909432618
Recall Score: 0.7806938294171223
Precision Score: 0.7892214454322285


# Naive Bayes

# Neural Network (MLP)

In [50]:
mlp = MLPClassifier()
mlp

In [51]:
evaluate_classification(mlp)

ADASYN OverSampling


f1_micro score: 0.6337643764989458
f1_macro score: 0.4986538312531386
precision_micro score: 0.5793258132078176
precision_macro score: 0.6801726079650029
recall_micro score: 0.6006899251843628
recall_macro score: 0.6194356673483867

Random OverSampling
f1_micro score: 0.6869557362005454
f1_macro score: 0.6989007456147197
precision_micro score: 0.6838757449006208
precision_macro score: 0.7247158189242031
recall_micro score: 0.6961755630135882
recall_macro score: 0.695979004178906

SMOTE OverSampling
f1_micro score: 0.6703882885201466
f1_macro score: 0.64915182093164
precision_micro score: 0.6315743843440238
precision_macro score: 0.6979141470595417
recall_micro score: 0.6568099752775749
recall_macro score: 0.6756594957782401

Borderline SMOTE OverSampling
f1_micro score: 0.6984072103173881
f1_macro score: 0.5970926245309673
precision_micro score: 0.7215860653216211
precision_macro score: 0.7231679492526064
recall_micro score: 0.6893797556873277
recall_macro score: 0.679159751603126

SVM

Karena Borderline SMOTE Oversampling menghasilkan nilai terbaik maka akan digunakan

In [52]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(mlp, X_borderSmote, y_borderSmote, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.7025959846206239
f1_macro score: 0.6691094688466002
precision_micro score: 0.7052933112538594
precision_macro score: 0.7259910963800189
recall_micro score: 0.6956187668596454
recall_macro score: 0.6829646598048459


In [53]:
X_train, X_test, y_train, y_test = train_test_split(X_borderSmote, y_borderSmote, test_size=0.25, random_state=42)

## Hyperparameter Tuning

In [54]:
param_grid = {
    'hidden_layer_sizes': [(100,), (50, 50), (30, 20, 10)],
    'activation': ['relu', 'tanh', 'logistic'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive'],
    'max_iter': [10000],
    'random_state': [42]
}

tuned_mlp = GridSearchCV(mlp, param_grid, cv=cv)
tuned_mlp.fit(X_train, y_train)



In [None]:
print("Hyperparameter terbaik untuk Decision Tree Classifier")
print(tuned_mlp.best_params_)

Hyperparameter terbaik untuk Decision Tree Classifier
{'activation': 'logistic', 'alpha': 1e-05, 'hidden_layer_sizes': 10, 'max_iter': 1000, 'random_state': 42, 'solver': 'adam'}


In [None]:
best_hypeparam_mlp = MLPClassifier(**tuned_mlp.best_params_)
best_hypeparam_mlp.fit(X_train, y_train)

In [None]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(best_hypeparam_mlp, X_train, y_train, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.676993063883709
f1_macro score: 0.7118221371248534
precision_micro score: 0.676993063883709
precision_macro score: 0.7119523723308918
recall_micro score: 0.676993063883709
recall_macro score: 0.7161506430083246


In [None]:
prediction_mlp = best_hypeparam_mlp.predict(X_test)
classification_metrics(prediction_mlp, y_test)

Accuracy: 0.6658428077113199
F1 Score: 0.7016426666936328
Recall Score: 0.7031492494782547
Precision Score: 0.7056713141602785


# Kesimpulan

Dari semua model classification, untuk kobinasi pre processing ini yang menghasilkan hasil paling bagus adalah Random Forest dengan data yang di Random Oversampling

XGBOOST

In [14]:
xgb = XGBClassifier()
xgb

In [15]:
evaluate_classification(xgb)

ADASYN OverSampling
f1_micro score: 0.8130441038578586
f1_macro score: 0.8131676935326292
precision_micro score: 0.8130441038578585
precision_macro score: 0.8156295211528445
recall_micro score: 0.8130441038578585
recall_macro score: 0.8137086447835671

Random OverSampling
f1_micro score: 0.8369924966968496
f1_macro score: 0.8368855163907423
precision_micro score: 0.8369924966968496
precision_macro score: 0.837371745693148
recall_micro score: 0.8369924966968496
recall_macro score: 0.8369954033505437

SMOTE OverSampling
f1_micro score: 0.8148850261221622
f1_macro score: 0.8142022450821775
precision_micro score: 0.8148850261221622
precision_macro score: 0.81758266449704
recall_micro score: 0.8148850261221622
recall_macro score: 0.8148962667654256

Borderline SMOTE OverSampling
f1_micro score: 0.809279125301526
f1_macro score: 0.8095146445106772
precision_micro score: 0.809279125301526
precision_macro score: 0.8137795158854024
recall_micro score: 0.809279125301526
recall_macro score: 0.809

In [16]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(xgb, X_smote, y_smote, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.8148850261221622
f1_macro score: 0.8142022450821775
precision_micro score: 0.8148850261221622
precision_macro score: 0.81758266449704
recall_micro score: 0.8148850261221622
recall_macro score: 0.8148962667654256


In [17]:
X_train, X_test, y_train, y_test = train_test_split(X_smote, y_smote, test_size=0.25, random_state=42)

## Hyperparameter Tuning

In [18]:
param_grid = {
    'learning_rate': [0.01, 0.1, 0.2],
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 4, 5],
    'objective':['multiclass:softmax'],
}

xgb_grid = GridSearchCV(estimator= xgb, param_grid=param_grid, cv=cv, scoring='accuracy', verbose=1, n_jobs=-1)
xgb_grid.fit(X_train, y_train)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


In [19]:
print("Hyperparameter terbaik untuk Decision Tree Classifier")
print(xgb_grid.best_params_)

Hyperparameter terbaik untuk Decision Tree Classifier
{'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 300, 'objective': 'multiclass:softmax'}


In [20]:
best_xgb = XGBClassifier(**xgb_grid.best_params_)
best_xgb.fit(X_train, y_train)

In [21]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = ['f1_micro', 'f1_macro', 'precision_micro', 'precision_macro', 'recall_micro', 'recall_macro']
for i in range(len(scoring)):
    score = cross_val_score(best_xgb, X_train, y_train, scoring=scoring[i], cv=cv, n_jobs=-1)
    print(f"{scoring[i]} score: {np.mean(score)}")

f1_micro score: 0.8083851466932088
f1_macro score: 0.8076922797657542
precision_micro score: 0.8083851466932088
precision_macro score: 0.809893549067076
recall_micro score: 0.8083851466932088
recall_macro score: 0.8087637531137226


In [22]:
prediction = best_xgb.predict(X_test)
classification_metrics(prediction, y_test)

Accuracy: 0.8075965130759651
F1 Score: 0.8074524851291326
Recall Score: 0.8075233139651775
Precision Score: 0.8074394862592809


# Kaggle

In [122]:
df_kaggle = pd.read_csv('dataframe/UFC_Pre_Kombinasi7.csv')
df_kaggle.head()

Unnamed: 0,weight_class,id,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,B_avg_opp_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_avg_TD_att,B_avg_TD_landed,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_BODY_att,B_avg_BODY_landed,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_LEG_att,B_avg_LEG_landed,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,...,R_avg_opp_DISTANCE_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_CTRL_time(seconds),R_avg_opp_CTRL_time(seconds),R_total_time_fought(seconds),R_total_rounds_fought,R_total_title_bouts,R_current_win_streak,R_current_lose_streak,R_longest_win_streak,R_wins,R_losses,R_draw,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,gender,B_Stance_Open Stance,B_Stance_Orthodox,B_Stance_Southpaw,B_Stance_Switch,R_Stance_Open Stance,R_Stance_Orthodox,R_Stance_Southpaw,R_Stance_Switch
0,1,0.0,0.0,0.0,0.49375,0.44875,0.475,0.1775,0.0,0.125,0.5,0.0,88.9375,42.0625,68.8125,28.4375,131.625,80.3125,145.4375,79.640625,5.25,2.380859,2.0625,1.25,60.3125,19.375,49.8125,13.5,15.125,13.0,17.125,12.0,13.5,9.6875,0.625,0.3125,58.125,17.6875,44.1875,9.6875,...,32.625,7.4375,5.375,6.8125,2.875,3.375,1.9375,3.4375,2.125,215.0,94.9375,900.0,15.0,0.0,3.0,0.0,3.0,4.0,1.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,154.94,152.4,115.0,35.0,27.0,0,False,True,False,False,False,True,False,False
1,6,1.0,0.0,0.25,0.473125,0.371875,0.0,0.20375,0.0,0.5,0.1875,0.0,54.3125,23.75,53.875,20.125,63.6875,32.25,109.875,70.375,0.0,0.0,6.0,2.3125,37.125,10.9375,37.75,11.375,10.4375,7.6875,8.0,3.625,6.75,5.125,8.125,5.125,42.6875,16.0625,47.25,14.5625,...,29.001953,16.0,11.433594,15.714844,11.5625,1.930664,1.556641,7.413086,6.749023,222.675781,273.668945,895.705078,31.0,0.0,3.0,0.0,4.0,6.0,5.0,0.0,0.0,1.0,1.0,2.0,0.0,0.0,170.18,182.88,170.0,28.0,31.0,1,False,True,False,False,False,True,False,False
2,4,2.0,0.0,0.5,0.5,0.48,0.615,0.0,0.0,0.0,0.0,0.0,72.5,36.5,48.0,23.0,118.5,74.0,117.0,79.640625,5.835938,2.380859,1.5,0.0,61.0,30.0,27.5,10.5,8.5,4.5,16.5,9.5,3.0,2.0,4.0,3.0,40.0,14.0,31.5,12.0,...,46.0,7.0,3.5,8.5,6.5,2.0,1.0,0.0,0.0,35.5,40.5,900.0,6.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,170.18,177.8,145.0,24.0,30.0,1,False,True,False,False,False,True,False,False
3,3,3.0,0.0,0.0,0.595,0.57,0.62,0.0,0.0,1.0,0.0,0.5,12.0,8.0,36.0,25.0,104.0,84.0,109.0,79.640625,5.835938,2.380859,0.0,0.0,11.0,7.0,23.0,15.0,0.0,0.0,9.0,6.0,1.0,1.0,4.0,4.0,2.0,0.0,17.0,8.0,...,17.96875,5.4375,3.5625,5.632812,3.786182,5.0,3.484375,3.46875,2.5,112.085938,85.261719,674.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,165.1,162.56,125.0,27.0,23.0,1,False,True,False,False,False,True,False,False
4,6,4.0,0.0,0.0,0.505312,0.439375,0.62,0.067187,1.002136,0.640625,0.015625,0.046875,29.890625,14.53125,13.90625,5.171875,54.515625,39.078125,22.6875,13.53125,2.46875,2.21875,2.625,0.171875,24.25,11.265625,8.453125,0.875,2.328125,1.4375,2.40625,2.15625,3.3125,1.828125,3.046875,2.140625,18.984375,5.71875,9.984375,2.546875,...,2.5,2.0,0.5,7.0,5.0,0.5,0.5,9.5,6.5,0.0,0.0,476.0,3.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,175.26,182.88,170.0,33.0,24.0,1,False,True,False,False,False,True,False,False


In [123]:
print(set(df.columns))
print(set(df_kaggle.columns))

print(set(df.columns) - set(df_kaggle.columns))
print(set(df_kaggle.columns) - set(df.columns))

{'B_avg_opp_TOTAL_STR_landed', 'B_avg_REV', 'R_avg_opp_GROUND_landed', 'R_Stance_Orthodox', 'R_avg_opp_CTRL_time(seconds)', 'B_avg_TD_att', 'R_avg_opp_BODY_landed', 'R_avg_DISTANCE_att', 'R_total_rounds_fought', 'R_avg_KD', 'B_avg_opp_BODY_att', 'B_avg_opp_DISTANCE_landed', 'B_avg_HEAD_att', 'B_avg_opp_LEG_att', 'gender', 'B_avg_opp_TD_att', 'R_avg_opp_GROUND_att', 'R_current_win_streak', 'R_win_by_Decision_Majority', 'R_avg_LEG_att', 'B_avg_TOTAL_STR_landed', 'B_win_by_TKO_Doctor_Stoppage', 'R_avg_DISTANCE_landed', 'R_avg_opp_SIG_STR_pct', 'B_win_by_Decision_Split', 'B_avg_opp_TD_pct', 'R_avg_SIG_STR_pct', 'R_avg_opp_HEAD_att', 'R_Stance_Switch', 'B_avg_LEG_landed', 'R_avg_opp_SIG_STR_landed', 'B_avg_DISTANCE_landed', 'R_avg_HEAD_landed', 'R_avg_HEAD_att', 'R_longest_win_streak', 'B_avg_CLINCH_att', 'R_avg_GROUND_att', 'R_total_title_bouts', 'R_avg_opp_TOTAL_STR_landed', 'B_avg_opp_GROUND_att', 'B_avg_SIG_STR_landed', 'B_avg_opp_REV', 'B_Stance_Switch', 'B_Height_cms', 'R_avg_GROUND_l

In [124]:
df_kaggle = df_kaggle.drop(columns="id")

In [125]:
df_kaggle

Unnamed: 0,weight_class,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,B_avg_opp_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_avg_TD_att,B_avg_TD_landed,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_BODY_att,B_avg_BODY_landed,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_LEG_att,B_avg_LEG_landed,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_CLINCH_att,...,R_avg_opp_DISTANCE_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_CTRL_time(seconds),R_avg_opp_CTRL_time(seconds),R_total_time_fought(seconds),R_total_rounds_fought,R_total_title_bouts,R_current_win_streak,R_current_lose_streak,R_longest_win_streak,R_wins,R_losses,R_draw,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,gender,B_Stance_Open Stance,B_Stance_Orthodox,B_Stance_Southpaw,B_Stance_Switch,R_Stance_Open Stance,R_Stance_Orthodox,R_Stance_Southpaw,R_Stance_Switch
0,1,0.000000,0.00,0.493750,0.448750,0.475000,0.177500,0.000000,0.125000,0.500000,0.000000,88.937500,42.062500,68.812500,28.437500,131.625000,80.312500,145.437500,79.640625,5.250000,2.380859,2.062500,1.250000,60.312500,19.375000,49.812500,13.500000,15.125000,13.000000,17.12500,12.000000,13.500000,9.687500,0.625000,0.312500,58.125000,17.68750,44.187500,9.687500,16.000000,...,32.625000,7.437500,5.375000,6.812500,2.875000,3.375000,1.937500,3.437500,2.125000,215.000000,94.937500,900.000000,15.0,0.0,3.0,0.0,3.0,4.0,1.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,154.94,152.40,115.0,35.0,27.0,0,False,True,False,False,False,True,False,False
1,6,0.000000,0.25,0.473125,0.371875,0.000000,0.203750,0.000000,0.500000,0.187500,0.000000,54.312500,23.750000,53.875000,20.125000,63.687500,32.250000,109.875000,70.375000,0.000000,0.000000,6.000000,2.312500,37.125000,10.937500,37.750000,11.375000,10.437500,7.687500,8.00000,3.625000,6.750000,5.125000,8.125000,5.125000,42.687500,16.06250,47.250000,14.562500,11.500000,...,29.001953,16.000000,11.433594,15.714844,11.562500,1.930664,1.556641,7.413086,6.749023,222.675781,273.668945,895.705078,31.0,0.0,3.0,0.0,4.0,6.0,5.0,0.0,0.0,1.0,1.0,2.0,0.0,0.0,170.18,182.88,170.0,28.0,31.0,1,False,True,False,False,False,True,False,False
2,4,0.000000,0.50,0.500000,0.480000,0.615000,0.000000,0.000000,0.000000,0.000000,0.000000,72.500000,36.500000,48.000000,23.000000,118.500000,74.000000,117.000000,79.640625,5.835938,2.380859,1.500000,0.000000,61.000000,30.000000,27.500000,10.500000,8.500000,4.500000,16.50000,9.500000,3.000000,2.000000,4.000000,3.000000,40.000000,14.00000,31.500000,12.000000,13.500000,...,46.000000,7.000000,3.500000,8.500000,6.500000,2.000000,1.000000,0.000000,0.000000,35.500000,40.500000,900.000000,6.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,170.18,177.80,145.0,24.0,30.0,1,False,True,False,False,False,True,False,False
3,3,0.000000,0.00,0.595000,0.570000,0.620000,0.000000,0.000000,1.000000,0.000000,0.500000,12.000000,8.000000,36.000000,25.000000,104.000000,84.000000,109.000000,79.640625,5.835938,2.380859,0.000000,0.000000,11.000000,7.000000,23.000000,15.000000,0.000000,0.000000,9.00000,6.000000,1.000000,1.000000,4.000000,4.000000,2.000000,0.00000,17.000000,8.000000,1.000000,...,17.968750,5.437500,3.562500,5.632812,3.786182,5.000000,3.484375,3.468750,2.500000,112.085938,85.261719,674.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,165.10,162.56,125.0,27.0,23.0,1,False,True,False,False,False,True,False,False
4,6,0.000000,0.00,0.505312,0.439375,0.620000,0.067187,1.002136,0.640625,0.015625,0.046875,29.890625,14.531250,13.906250,5.171875,54.515625,39.078125,22.687500,13.531250,2.468750,2.218750,2.625000,0.171875,24.250000,11.265625,8.453125,0.875000,2.328125,1.437500,2.40625,2.156250,3.312500,1.828125,3.046875,2.140625,18.984375,5.71875,9.984375,2.546875,3.203125,...,2.500000,2.000000,0.500000,7.000000,5.000000,0.500000,0.500000,9.500000,6.500000,0.000000,0.000000,476.000000,3.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,175.26,182.88,170.0,33.0,24.0,1,False,True,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597,3,0.015625,0.00,0.461132,0.430000,0.217562,0.192031,0.125000,0.068893,0.000000,0.000000,67.547211,30.941406,66.855469,27.823242,90.187500,47.000000,88.656250,43.437500,1.656250,0.733398,2.093750,0.750000,51.546875,18.554688,51.519531,15.902344,7.500000,5.000000,8.00000,5.257812,4.593750,3.578125,4.429688,3.500000,52.110474,18.06250,50.195312,17.000000,5.500000,...,17.968750,5.437500,3.562500,5.632812,3.786182,5.000000,3.484375,3.468750,2.500000,112.085938,85.261719,674.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,167.64,175.26,135.0,35.0,24.0,0,False,True,False,False,False,True,False,False
598,5,0.000000,0.50,0.585000,0.570000,0.000000,0.500000,1.000000,0.000000,0.500000,0.000000,21.000000,11.000000,35.500000,17.500000,51.000000,37.500000,64.000000,41.000000,1.500000,0.000000,3.500000,2.312500,16.000000,7.000000,32.000000,15.000000,3.500000,2.500000,3.50000,2.500000,1.500000,1.500000,0.000000,0.000000,7.500000,1.50000,14.000000,4.500000,7.500000,...,9.968750,4.093750,2.187500,3.437500,1.812500,12.375000,8.406250,16.500000,11.312500,87.843750,143.281250,475.312500,12.0,0.0,2.0,0.0,2.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,180.34,185.42,170.0,30.0,22.0,1,False,False,True,False,False,True,False,False
599,5,0.750000,0.50,0.360254,0.501284,0.250000,0.119775,0.070312,0.008789,0.281250,0.000000,102.378906,39.505371,90.445801,47.699219,104.798828,41.519043,92.248047,49.399902,0.296875,0.250000,2.027344,0.570801,87.966797,28.396973,66.450684,28.889160,11.666504,8.790527,17.12500,12.000000,2.745605,2.317871,3.665527,3.273926,90.642090,30.81543,77.714355,38.344238,7.118164,...,6.437500,3.812500,1.562500,5.187500,4.312500,16.437500,12.062500,0.312500,0.187500,302.000000,0.187500,455.937500,9.0,0.0,3.0,0.0,5.0,5.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,0.0,170.18,180.34,155.0,31.0,32.0,1,False,True,False,False,False,True,False,False
600,9,0.000000,0.00,0.520000,0.557500,0.500000,0.080000,0.000000,0.250000,0.000000,0.000000,74.500000,40.750000,52.000000,29.250000,85.750000,50.750000,63.000000,38.500000,2.250000,1.000000,3.750000,0.500000,68.250000,35.250000,42.250000,19.500000,4.750000,4.000000,6.75000,6.750000,1.500000,1.500000,3.000000,3.000000,46.250000,20.00000,35.000000,17.500000,14.250000,...,5.597656,1.789062,1.273438,15.714844,11.562500,2.105469,0.980469,0.343750,0.218750,52.361328,126.683594,319.214844,17.0,0.0,1.0,0.0,1.0,5.0,5.0,0.0,0.0,0.0,1.0,4.0,0.0,0.0,180.34,187.96,205.0,28.0,33.0,1,False,False,True,False,False,True,False,False


In [126]:
prediction_kaggle = best_hypeparam_rfc.predict(df_kaggle)
prediction_kaggle



array([0, 0, 0, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 0,
       0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 0, 0,
       2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2,
       2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 0, 2, 2, 2, 2, 2,
       2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2,
       2, 2, 0, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 2,
       0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 2, 0,
       0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2,
       2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0,
       2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0, 0,
       0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2,
       0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2,
       0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0,

In [127]:
class_mapping = {0:'Blue', 1:'Draw', 2:'Red'}
def map_class(value):
    return class_mapping.get(value, value)

vectorized_mapping  = np.vectorize(map_class)

prediction_kaggle = vectorized_mapping(prediction_kaggle)

In [128]:
prediction_kaggle

array(['Blue', 'Blue', 'Blue', 'Red', 'Red', 'Red', 'Blue', 'Red', 'Red',
       'Blue', 'Blue', 'Red', 'Red', 'Red', 'Red', 'Red', 'Blue', 'Red',
       'Red', 'Red', 'Blue', 'Blue', 'Blue', 'Blue', 'Blue', 'Red',
       'Blue', 'Blue', 'Blue', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red',
       'Blue', 'Red', 'Red', 'Blue', 'Red', 'Red', 'Red', 'Blue', 'Blue',
       'Red', 'Red', 'Red', 'Blue', 'Blue', 'Blue', 'Blue', 'Red', 'Red',
       'Red', 'Red', 'Blue', 'Red', 'Blue', 'Blue', 'Red', 'Red', 'Blue',
       'Red', 'Red', 'Red', 'Red', 'Red', 'Blue', 'Red', 'Red', 'Red',
       'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Blue', 'Blue',
       'Blue', 'Red', 'Blue', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red',
       'Red', 'Red', 'Blue', 'Red', 'Red', 'Blue', 'Red', 'Red', 'Red',
       'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Blue', 'Red', 'Red',
       'Red', 'Blue', 'Red', 'Red', 'Red', 'Blue', 'Red', 'Red', 'Blue',
       'Blue', 'Red', 'Red', 'Blue', 'Blue', 'Blue', 'Blue'

In [129]:
df_kaggle = pd.read_csv('dataframe/UFC_Test_Classif_X.csv')
df_kaggle.head()

Unnamed: 0,id,R_fighter,B_fighter,Referee,date,location,title_bout,weight_class,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,B_avg_opp_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_avg_TD_att,B_avg_TD_landed,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_BODY_att,B_avg_BODY_landed,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_LEG_att,B_avg_LEG_landed,...,R_avg_opp_BODY_landed,R_avg_LEG_att,R_avg_LEG_landed,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_CTRL_time(seconds),R_avg_opp_CTRL_time(seconds),R_total_time_fought(seconds),R_total_rounds_fought,R_total_title_bouts,R_current_win_streak,R_current_lose_streak,R_longest_win_streak,R_wins,R_losses,R_draw,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age
0,0,Tecia Torres,Juliana Lima,Chris Tognoni,2017-07-07,"Las Vegas, Nevada, USA",False,WomenStrawweight,0.0,0.0,0.49375,0.44875,0.475,0.1775,0.0,0.125,0.5625,0.0,88.9375,42.0625,68.8125,28.4375,131.625,80.3125,145.4375,98.75,5.25,2.5,2.0625,1.25,60.3125,19.375,49.8125,13.5,15.125,13.0,18.375,14.625,13.5,9.6875,...,13.1875,22.5,19.5,4.9375,3.875,147.8125,66.0625,130.8125,32.625,7.4375,5.375,6.8125,2.875,3.375,1.9375,3.4375,2.125,215.0,94.9375,900.0,15,0,3,0,3,4,1,0,0,0,4,0,0,0,Orthodox,154.94,152.4,115.0,35.0,27.0
1,1,John Howard,Lorenz Larkin,Herb Dean,2015-01-18,"Boston, Massachusetts, USA",False,Welterweight,0.0,0.25,0.473125,0.371875,0.0,0.20375,0.0,0.5,0.1875,0.0,54.3125,23.75,53.875,20.125,63.6875,32.25,109.875,70.375,0.0,0.0,7.8125,2.375,37.125,10.9375,37.75,11.375,10.4375,7.6875,8.0,3.625,6.75,5.125,...,14.579102,13.258789,12.456055,18.15625,14.165039,39.666992,23.71875,67.904297,29.001953,23.037109,13.837891,21.053711,15.53418,1.930664,1.556641,7.413086,6.749023,222.675781,273.668945,895.705078,31,0,4,0,4,6,5,0,0,3,1,2,0,0,Orthodox,170.18,182.88,170.0,28.0,31.0
2,2,Kyle Bochniak,Jeremy Kennedy,Todd Ronald Anderson,2017-07-22,"Uniondale, New York, USA",False,Featherweight,0.0,0.5,0.5,0.48,0.615,0.0,0.0,0.0,0.0,0.0,72.5,36.5,48.0,23.0,118.5,74.0,117.0,89.0,11.5,6.5,1.5,0.0,61.0,30.0,27.5,10.5,8.5,4.5,16.5,9.5,3.0,2.0,...,15.0,17.5,15.5,21.0,16.0,126.0,42.0,190.0,67.0,7.0,3.5,8.5,6.5,2.0,1.0,0.0,0.0,35.5,40.5,900.0,6,0,0,1,1,1,1,0,0,1,0,0,0,0,Orthodox,170.18,177.8,145.0,24.0,30.0
3,3,Yao Zhikui,Royston Wee,Steve Perceval,2014-08-23,"Macau, China",False,Bantamweight,0.0,0.0,0.66,0.69,0.85,0.0,0.0,2.0,0.0,1.0,12.0,8.0,36.0,25.0,104.0,84.0,109.0,89.0,7.0,6.0,0.0,0.0,11.0,7.0,23.0,15.0,0.0,0.0,9.0,6.0,1.0,1.0,...,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Orthodox,165.1,162.56,125.0,27.0,23.0
4,4,Carlos Newton,Pat Miletich,John McCarthy,2001-05-04,"Atlantic City, New Jersey, USA",True,Welterweight,0.0,0.0,0.505312,0.439375,0.815937,0.067187,1.28125,0.640625,0.015625,0.046875,29.890625,14.53125,13.90625,5.171875,54.515625,39.078125,22.6875,13.53125,2.46875,2.21875,2.625,0.171875,24.25,11.265625,8.453125,0.875,2.328125,1.4375,2.40625,2.15625,3.3125,1.828125,...,1.0,4.5,4.0,0.0,0.0,14.0,9.5,5.0,2.5,2.0,0.5,7.0,5.0,0.5,0.5,9.5,6.5,0.0,0.0,476.0,3,1,1,0,1,1,1,0,0,0,0,0,1,0,Orthodox,175.26,,170.0,33.0,24.0


In [130]:
df_kaggle = df_kaggle[['id']]
df_kaggle.head()

Unnamed: 0,id
0,0
1,1
2,2
3,3
4,4


In [131]:
df_kaggle['Winner'] = pd.DataFrame({'Winner': prediction_kaggle})

In [132]:
df_kaggle.head()

Unnamed: 0,id,Winner
0,0,Blue
1,1,Blue
2,2,Blue
3,3,Red
4,4,Red


In [82]:
df_kaggle.to_csv('dataframe/Kaggle_prediction7smote.csv', index=False)
