# UFC Winner Prediction

*Course: Machine Learning Projects with TensorFlow 2.0 by Vlad Sebastian Ionescu*

*Data: https://www.kaggle.com/rajeevw/ufcdata*

In [12]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
import os, datetime
%load_ext tensorboard

print(tf.__version__)

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
2.0.0


## 1. Load Data

In [2]:
data = pd.read_csv('data/preprocessed_data.csv').sample(frac=1)
data.head()

Unnamed: 0,Winner,title_bout,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,...,weight_class_Women's Strawweight,B_Stance_Open Stance,B_Stance_Orthodox,B_Stance_Sideways,B_Stance_Southpaw,B_Stance_Switch,R_Stance_Open Stance,R_Stance_Orthodox,R_Stance_Southpaw,R_Stance_Switch
1565,Blue,True,5,0.0,2.0,0.0,29.0,19.5,19.0,13.5,...,1,0,1,0,0,0,0,1,0,0
126,Blue,False,3,0.0,3.0,0.0,16.0,13.333333,12.666667,9.666667,...,0,0,0,0,0,1,0,1,0,0
63,Blue,False,3,0.0,1.0,0.0,5.5,4.5,4.5,3.5,...,0,0,0,0,0,1,0,0,1,0
1356,Red,False,3,1.0,0.0,0.0,13.583333,8.5,10.25,5.916667,...,0,0,1,0,0,0,0,1,0,0
465,Blue,False,3,1.0,0.0,0.0,12.25,7.0,1.5,1.0,...,0,0,1,0,0,0,0,1,0,0


## 2. Data Processing

In [3]:
data['Winner'] = data['Winner'].map(lambda x: 1 if x=='Red' else 0)
data['title_bout'] = data['title_bout'].map(lambda x: 1 if x=='Red' else 0)

In [4]:
data.head()

Unnamed: 0,Winner,title_bout,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,...,weight_class_Women's Strawweight,B_Stance_Open Stance,B_Stance_Orthodox,B_Stance_Sideways,B_Stance_Southpaw,B_Stance_Switch,R_Stance_Open Stance,R_Stance_Orthodox,R_Stance_Southpaw,R_Stance_Switch
1565,0,0,5,0.0,2.0,0.0,29.0,19.5,19.0,13.5,...,1,0,1,0,0,0,0,1,0,0
126,0,0,3,0.0,3.0,0.0,16.0,13.333333,12.666667,9.666667,...,0,0,0,0,0,1,0,1,0,0
63,0,0,3,0.0,1.0,0.0,5.5,4.5,4.5,3.5,...,0,0,0,0,0,1,0,0,1,0
1356,1,0,3,1.0,0.0,0.0,13.583333,8.5,10.25,5.916667,...,0,0,1,0,0,0,0,1,0,0
465,0,0,3,1.0,0.0,0.0,12.25,7.0,1.5,1.0,...,0,0,1,0,0,0,0,1,0,0


In [5]:
data.describe()

Unnamed: 0,Winner,title_bout,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,...,weight_class_Women's Strawweight,B_Stance_Open Stance,B_Stance_Orthodox,B_Stance_Sideways,B_Stance_Southpaw,B_Stance_Switch,R_Stance_Open Stance,R_Stance_Orthodox,R_Stance_Southpaw,R_Stance_Switch
count,3592.0,3592.0,3592.0,3592.0,3592.0,3592.0,3592.0,3592.0,3592.0,3592.0,...,3592.0,3592.0,3592.0,3592.0,3592.0,3592.0,3592.0,3592.0,3592.0,3592.0
mean,0.662584,0.0,3.196548,0.560412,1.126114,0.0,8.800322,6.15801,8.308673,5.615597,...,0.027283,0.001392,0.768931,0.000557,0.197383,0.031737,0.003341,0.758352,0.210746,0.027561
std,0.472894,0.0,0.664834,0.79066,1.385555,0.0,7.099821,5.077738,7.51837,5.41542,...,0.162929,0.037288,0.421575,0.023593,0.398079,0.175324,0.057711,0.428142,0.407895,0.163735
min,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,3.0,0.0,0.0,0.0,3.666667,2.5,3.0,2.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
50%,1.0,0.0,3.0,0.0,1.0,0.0,7.0,5.0,6.467611,4.275253,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
75%,1.0,0.0,3.0,1.0,2.0,0.0,12.275974,8.5,11.5,7.857143,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
max,1.0,0.0,5.0,6.0,12.0,0.0,49.0,39.0,87.0,68.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [6]:
train_size = int(0.7*len(data))
features = data.drop(columns=['Winner'])
target = data['Winner']
X_train, X_test = features.values[:train_size], features.values[train_size:]
y_train, y_test = target.values[:train_size], target.values[train_size:]

In [7]:
corr = data.corr()
cmap = sns.diverging_palette(250, 10, as_cmap=True)
#plt.figure(figsize=(40, 80))
#sns.heatmap(corr[['Winner']], square=True, cmap=cmap, 
#            annot=True, vmax=3, cbar_kws={'shrink':.5}, linewidths=.5)

## 3. TF ANN

In [19]:
%reload_ext tensorboard

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation=tf.nn.leaky_relu), # x if x>0, else alpha*x
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(128, activation=tf.nn.leaky_relu), 
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# We give more weight to the class that is under represented (blue)
red = len(y_train[y_train>0])
blue = len(y_train) - red
total = len(y_train)
weight_red = total / (2*red)
weight_blue = total / (2*blue)
class_weight = {0: weight_blue, 1: weight_red}
print(class_weight)

adam_optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.MAE

model.compile(
    optimizer=adam_optimizer,
    loss = 'binary_crossentropy', 
    metrics=[tf.keras.metrics.TruePositives(name='tp'),
             tf.keras.metrics.FalsePositives(name='fp'),
             tf.keras.metrics.TrueNegatives(name='tn'),
             tf.keras.metrics.FalseNegatives(name='fn'),      
             tf.keras.metrics.BinaryAccuracy(name='accuracy'), 
             tf.keras.metrics.Precision(name='precision'),
             tf.keras.metrics.Recall(name='recall'), 
             tf.keras.metrics.AUC(name='auc')
            ])

save_best_callback = tf.keras.callbacks.ModelCheckpoint('models/model-{epoch:02d}-{val_accuracy:.2f}.hdf5', 
                                                        monitor='val_accuracy', 
                                                        verbose=1, 
                                                        save_best_only=True, 
                                                        save_weights_only=False, 
                                                        save_frequency=1)

logdir = os.path.join('tflogs', datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1, profile_batch=0)

%tensorboard --logdir tflogs

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(X_train)

model.fit(x_train_scaled, y_train, 
          epochs=30, 
          class_weight=class_weight,
          batch_size=64,
          validation_split=0.1,
          callbacks=[tensorboard_callback]
         )

{0: 1.4684579439252337, 1: 0.7581423401688782}


Reusing TensorBoard on port 6006 (pid 8880), started 0:07:51 ago. (Use '!kill 8880' to kill it.)

Train on 2262 samples, validate on 252 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f8ee2d69190>

In [9]:
#loaded_model = tf.keras.models.load_model('models/model-50-0.61.hdf5')
#print(loaded_model.summary())
x_test_scaled = scaler.fit_transform(X_test)
model.evaluate(x_test_scaled, y_test, verbose=2)
#np.round(model.predict(X_test))

1078/1 - 1s - loss: 1.1980 - accuracy: 0.6466 - precision: 0.7418


[1.241041070676248, 0.6465677, 0.741844]