In [1]:
import time
import random
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import matplotlib as matplot
import matplotlib.pyplot as plt

from collections import Counter

from sklearn import preprocessing
from sklearn.feature_selection import RFECV
from sklearn.preprocessing import MaxAbsScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, classification_report, make_scorer, accuracy_score
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score, cross_val_predict

# Collect Data

## Fighter Stats

In [2]:
# Read
fighter_stats = pd.read_csv("UFC_Fighters_Database.csv")

In [3]:
fighter_stats.set_index("NAME", inplace = True)

In [4]:
fighter_stats = fighter_stats[["REACH" ,"SLPM" ,"SAPM" ,"STRA" ,"STRD" ,"TD" ,"TDA" ,"TDD" ,"SUBA"]]

In [5]:
transformer_fighter_stats = MaxAbsScaler().fit(fighter_stats.values)
transformed_fighter_stats_values = transformer_fighter_stats.transform(fighter_stats.values)
fighter_stats.loc[:,:] = transformed_fighter_stats_values

In [6]:
fighter_stats.index = [element.lower() for element in list(fighter_stats.index)]

## Fight Stats

In [7]:
# Fight Statistics
%store -r ufc_1_raw

In [8]:
ufc_1_raw[((ufc_1_raw["Name"] == "Dong Hyun Kim") | (ufc_1_raw["Name"] == "Michael McDonald"))].index

Int64Index([4670, 4886, 5856, 5865, 6069, 6144, 6373, 6399], dtype='int64', name='Fight_ID')

In [9]:
ufc_1_raw.drop([4670, 4886, 5856, 5865, 6069, 6144, 6373, 6399], inplace= True)

In [10]:
# Clean for numerical data
round_data = ufc_1_raw.set_index("Name")

In [11]:
round_data.index = [element.lower() for element in list(round_data.index)]

In [12]:
round_data.drop(["Date", "Event_ID", "winby", "HomeTown", "Location", "ID", "winner", "corner"], inplace = True, axis = 1)

In [13]:
transformer_round_data = MaxAbsScaler().fit(round_data.values)
transformed_round_data_values = transformer_round_data.transform(round_data.values)
round_data.loc[:,:] = transformed_round_data_values

In [14]:
fighters = np.unique(list(round_data.index))
cols = list(round_data.columns)

In [15]:
def average_fights(df): 
    
    list_dfs = []
    for fighter in fighters:
        print(fighter)
        if df.loc[fighter].shape[0] !=442:
            df_new = pd.DataFrame(df.loc[fighter].mean(axis = 0, skipna = True))
            df_new.columns = [fighter]
            list_dfs.append(df_new)
            
        elif df.loc[fighter].shape[0] == 442:
            list_dfs.append(pd.DataFrame(df.loc[fighter]))
        
    df_1 = pd.concat(list_dfs, axis = 1)
    
#     df_1.index = fighters
#     df_1.columns = cols
        
    return df_1

In [16]:
averaged_fighter_performance = average_fights(round_data)

aaron phillips
abdul razak alhassan
abel trujillo
abner lloveras
adam milstead
adriano martins
aiemann zahabi
aisling daly
akbarh arreola
akira corassani
al iaquinta
alan jouban
alan omer
alan patrick
albert cheng
albert morales
albert tumenov
alberto mina
alberto uda
alejandro perez
aleksei oleinik
alessandro ricci
alessio di chirico
alex caceres
alex chambers
alex garcia
alex morono
alex nicholson
alex oliveira
alex torres
alex white
alexa grasso
alexander gustafsson
alexander volkanovski
alexander volkov
alexander yakovlev
alexandra albu
alexandre pantoja
alexis davis
alexis dufresne
ali bagautinov
alistair overeem
aljamain sterling
alptekin ozkilic
alvaro herrera
amanda cooper
amanda nunes
an ying wang
anderson silva
andre fili
andre soukhamthath
andreas stahl
andrei arlovski
andrew craig
andrew holbrook
andrew sanchez
andy enz
andy ogle
angela hill
angela magana
anna elmose
anthony birchak
anthony christodoulou
anthony hamilton
anthony johnson
anthony lapsley
anthony njokuani
anth

mizuto hirota
myles jury
nam phan
naoyuki kotani
nate diaz
nate marquardt
nathan coy
nazareno malegarie
neil magny
neil seery
nick catone
nick diaz
nick hein
nico musoke
nicolas dalby
nik lentz
nikita krylov
niklas backstrom
niko price
nina ansaroff
ning guangyou
noad lahat
nolan ticman
nordine taleb
norifumi yamamoto
norman parke
olivier aubin-mercier
oluwale bamgbose
omari akhmedov
ovince saint preux
paddy holohan
paige vanzant
pat healy
patrick cote
patrick cummins
patrick holohan
patrick walsh
patrick williams
paul craig
paul felder
paul redmond
paulo borrachinha
paulo thiago
pawel pawlak
pearl gonzalez
pedro munhoz
peter sobotta
phil davis
phil harris
phillipe nover
piotr hallmann
polo reyes
quinn mulhern
rafael dos anjos
rafael feijao
rafael natal
rafaello oliveira
rampage jackson
ramsey nijem
randa markos
randy brown
rani yahya
raphael assuncao
raquel pennington
rashad evans
rashid magomedov
ray borg
reginaldo vieira
renan barao
renato moicano
renee forte
reza madadi
ricardo abr

In [17]:
averaged_fighter_performance = averaged_fighter_performance.T

## Combine Performance and Profile Stats

In [18]:
del fighter_stats.index.name

In [19]:
intersect_perf_prof_index = fighter_stats.index.intersection(averaged_fighter_performance.index)

In [20]:
smaller_performance = averaged_fighter_performance.drop([element for element in list(averaged_fighter_performance.index) 
                                                         if element not in list(intersect_perf_prof_index)], axis = 0)

smaller_profile = fighter_stats.drop([element for element in list(fighter_stats.index) 
                                      if element not in list(intersect_perf_prof_index)], axis = 0)

In [21]:
smaller_performance.sort_index(inplace = True)

In [22]:
smaller_profile.sort_index(inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [23]:
count_names = Counter(smaller_performance.index)
count_names_2 = Counter(smaller_profile.index)

In [24]:
for name, count in count_names_2.items():
    if count != 1:
        print(name)

## Merge databases and label winner

In [25]:
# Merge two datasets
prof_perf = smaller_profile.join(smaller_performance, how='inner')

In [26]:
%store -r ufc_2_raw

In [27]:
ufc_1_raw_1 = ufc_1_raw[["winner", "corner", "Name"]]

In [28]:
fights = np.unique(list(ufc_1_raw_1.index))

In [29]:
fights_name = []

for fight in fights:
    
    fighters = [element.lower() for element in list(ufc_1_raw_1.loc[fight]["Name"].values)]
    
    if (list(ufc_1_raw_1.loc[fight].values)[0][0] == list(ufc_1_raw_1.loc[fight].values)[0][1]):
        fighters.append(1)
    
    else:
        fighters.append(0)
    
    fights_name.append(fighters)
        

## Create training data dataframe

In [30]:
def make_training_set(df, labels):
    
    matrix = []
    for label in labels:
        
        try:
            row = [label[2]] + list(df.loc[label[0]].values) + list(df.loc[label[1]].values)
            matrix.append(np.array(row))
        except KeyError:
            continue
    
    return np.array(matrix)

In [31]:
data_for_training = make_training_set(prof_perf, fights_name)

In [32]:
y = data_for_training[:,0]

In [33]:
X = data_for_training[:,1:]

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

## Train

### Random neural network

In [103]:
clf = MLPClassifier()

clf.fit(X_train, y_train) 

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [104]:
y_pred = clf.predict(X_test)

In [105]:
accuracy_score(y_test, y_pred)

0.7208237986270023

In [106]:
def predict_winner(name_1, name_2):
    
    row = list(prof_perf.loc[name_1.lower()].values) + list(prof_perf.loc[name_2.lower()].values)
    
    score = clf.predict([row])
    
    if score == [1]:
        
        statement = "The winner is " + name_1
        
    else:
        
        statement = "The winner is " + name_2

    
    return statement

In [108]:
predict_winner("Jon Jones","Daniel Cormier")

'The winner is Jon Jones'

### GridSearch NN

In [52]:
hidden_layer_tuples = []

for n in np.arange(100) + 1:
    hidden_layer_tuples.append((random.choice(np.arange(100) + 1), random.choice(np.arange(100) + 1)))

hidden_layer_tuples = hidden_layer_tuples + [(5,2)]

In [None]:
mlp = MLPClassifier()

parameter_space = {
    'max_iter': np.arange(200, 5000),
    'hidden_layer_sizes': hidden_layer_tuples,
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'sgd', 'adam'],
    'alpha': [0.0001, 0.05, 0.001],
    'learning_rate': ['constant','adaptive'],
}

grd = GridSearchCV(mlp, parameter_space, n_jobs =- 1, cv = 5)
grd.fit(X_train, y_train)

# Best paramete set
print('Best parameters found:\n', clf.best_params_)

# All results
means = grd.cv_results_['mean_test_score']
stds = grd.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
    
y_true, y_pred = y_test , clf.predict(X_test)

print('Results on the test set:')
print(classification_report(y_true, y_pred))

