# 0. Import libraries and data

In [1]:
import pandas as pd
import numpy as np
import pycaret
import seaborn as sns
import matplotlib.pyplot as plt
import re
import requests
from bs4 import BeautifulSoup

%matplotlib inline
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)


___

In [2]:
ufc_df = pd.read_csv('../data/ufc_df.csv')

# 3. Modelling

In [3]:
# Set up modelling using PyCaret
from pycaret.classification import *
experiment = setup(ufc_df, target = 'winner', normalize=True, session_id=42)
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.7104,0.7857,0.7075,0.7118,0.7095,0.4207,0.4209,0.23
lightgbm,Light Gradient Boosting Machine,0.7051,0.7773,0.6995,0.7078,0.7033,0.4102,0.4106,1.049
ada,Ada Boost Classifier,0.6979,0.7739,0.7013,0.6974,0.6991,0.3958,0.3961,0.053
lda,Linear Discriminant Analysis,0.696,0.7647,0.6975,0.6964,0.6962,0.3921,0.3929,0.012
ridge,Ridge Classifier,0.6957,0.0,0.6975,0.6961,0.696,0.3915,0.3923,0.005
lr,Logistic Regression,0.6951,0.7664,0.6982,0.6948,0.6958,0.3902,0.391,0.263
rf,Random Forest Classifier,0.6926,0.7587,0.6808,0.6977,0.6889,0.3853,0.3857,0.244
xgboost,Extreme Gradient Boosting,0.6923,0.7683,0.687,0.6953,0.6909,0.3846,0.3849,0.716
et,Extra Trees Classifier,0.6789,0.7438,0.669,0.6828,0.6756,0.3578,0.3581,0.213
svm,SVM - Linear Kernel,0.64,0.0,0.6484,0.6427,0.6404,0.2799,0.2842,0.01


### 3a. Select best model

In [4]:
gbc = create_model('gbc')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.7267,0.8109,0.7267,0.7267,0.7267,0.4534,0.4534
1,0.6885,0.7754,0.6812,0.6899,0.6855,0.3769,0.3769
2,0.6916,0.7653,0.7062,0.6848,0.6954,0.3832,0.3834
3,0.7009,0.783,0.6812,0.7078,0.6943,0.4018,0.4021
4,0.704,0.7907,0.7019,0.7062,0.704,0.4081,0.4081
5,0.6885,0.7788,0.677,0.6943,0.6855,0.377,0.3771
6,0.7009,0.7614,0.6832,0.7097,0.6962,0.4019,0.4022
7,0.7726,0.8469,0.7826,0.7683,0.7754,0.5451,0.5452
8,0.7072,0.7656,0.6894,0.7161,0.7025,0.4144,0.4147
9,0.7227,0.7786,0.7453,0.7143,0.7295,0.4454,0.4458


### 3b. Test model on Holdout Set
- as seen below, results on the holdout test set is fairly good and close to the train set metrics, suggesting that the model is not overfitting to the train set.

In [5]:
predict_model(gbc)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Gradient Boosting Classifier,0.7081,0.8022,0.738,0.6955,0.7161,0.4162,0.417


Unnamed: 0,height_fighter1,reach_fighter1,SLpM_fighter1,Str_Acc_fighter1,SApM_fighter1,Str_Def_fighter1,TD_Avg_fighter1,TD_Acc_fighter1,TD_Def_fighter1,Sub_Avg_fighter1,...,stance_fighter1_not_available,stance_fighter2_Open Stance,stance_fighter2_Orthodox,stance_fighter2_Sideways,stance_fighter2_Southpaw,stance_fighter2_Switch,stance_fighter2_not_available,winner,Label,Score
0,-0.156715,-0.079388,-0.130647,0.028726,-0.196242,-0.561340,-0.879153,0.752700,0.246980,-0.585203,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,1,0.6439
1,-0.748381,-0.331822,-0.309506,-0.205979,0.152577,-0.326889,1.040470,0.030196,1.526391,-0.803613,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,1,0.5998
2,1.322451,1.687646,-0.241369,0.850192,-0.644722,-1.381920,-0.910115,-0.788641,-0.255646,-0.803613,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,0.7984
3,-0.748381,-0.331822,-0.726843,-1.144797,0.252239,0.376466,0.637968,-0.162471,-2.174763,0.943664,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,0,0.8374
4,-1.635880,-1.341555,-1.450795,-0.558036,-0.752690,1.079820,0.150322,-0.114305,1.206538,-0.039179,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,0,0.5212
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1372,0.139118,-1.089121,0.610339,0.263431,-0.038443,0.259240,-0.747566,0.752700,-0.164260,1.817303,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,1,0.6635
1373,-1.340048,-0.584254,1.257637,-0.675388,2.884986,0.376466,-1.150068,-1.029476,0.064207,-0.585203,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,1,0.5676
1374,0.730784,0.930345,0.601822,0.967544,-0.362345,-0.092437,0.382535,0.415531,-0.621192,0.179231,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1,1,0.7069
1375,-0.452549,0.677912,-1.663722,-0.205979,-0.287599,-2.436952,1.427491,0.993534,-2.677389,0.506845,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0,0,0.6518


### 3c. Hyperparameter tuning of model
- hyperparameter tuning did not improve model score,

In [6]:
tuned_model = tune_model(gbc, choose_better=True, optimize='F1')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.7143,0.785,0.7391,0.7041,0.7212,0.4286,0.4291
1,0.6978,0.7724,0.7688,0.6721,0.7172,0.3959,0.4
2,0.6698,0.7679,0.7375,0.6484,0.6901,0.3398,0.3431
3,0.7103,0.7843,0.7812,0.6831,0.7289,0.4208,0.4252
4,0.6947,0.7864,0.7329,0.6821,0.7066,0.3893,0.3904
5,0.7134,0.7802,0.7516,0.6994,0.7246,0.4267,0.4279
6,0.6729,0.7534,0.6832,0.6707,0.6769,0.3457,0.3458
7,0.7445,0.8312,0.7888,0.7257,0.756,0.489,0.4908
8,0.6978,0.7498,0.7019,0.6975,0.6997,0.3956,0.3956
9,0.6916,0.7854,0.7453,0.6742,0.708,0.383,0.3851


### 3d. Finalise model

In [7]:
final_model = finalize_model(gbc)

### 3e. Evaluate model using various metrics/plots

In [8]:
evaluate_model(final_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

- from the `evaluate_model` function above, we can see that the most important features are:
    - win% ; this is expected and self-explanatory
    - SLpM ; interestingly, strikes are the next 2 important features in determining the winner. 
    - 7 out of the top 10 features (exclude win%) are Strike-related attributes, as opposed to Takedown or Submission.

Note:
* SLpM - Significant Strikes Landed per Minute
* Str_Acc - Significant Striking Accuracy
* SApM - Significant Strikes Absorbed per Minute
* Str_Def - Significant Strike Defence (the % of opponents strikes that did not land)
* TD_Avg - Average Takedowns Landed per 15 minutes
* TD_Acc - Takedown Accuracy
* TD_Def - Takedown Defense (the % of opponents TD attempts that did not land)
* Sub_Avg - Average Submissions Attempted per 15 minutes

___

# 4. Predict model on unseen data
- as there are many features required to predict the model, 3 ways to scraping these data from online soruces are presented below.

### 4a. Prediction using data scraped from official UFC website (UFC.com)

In [9]:
# Function to get attributes of fighters from UFC.com
def extract_attribute(athlete_name):
    
    url = 'https://www.ufc.com/athlete/' + athlete_name
    response = requests.get(url)
    soup = BeautifulSoup(response.text,'lxml')
    
    ### Get attribute 1 ###
    attr1 = soup.find_all('text', class_='e-chart-circle__percent')
    attr_dict1 = {
        'Str_Acc': 0,
        'TD_Acc': 0,
    }
    attr_keys1 = list(attr_dict1.keys())

    for i in range(len(attr_dict1)):
        val = attr1[i].text
        val = float(re.sub('[\%]','',val))
        attr_dict1[attr_keys1[i]] = val


    ### Get attribute 2 ###
    attr2 = soup.find_all('div', class_='c-stat-compare__number')
    attr_dict2 = {
        'SLpM': 0,
        'SApM': 0,
        'TD_Avg': 0,
        'Sub_Avg': 0,
        'Str_Def': 0,
        'TD_Def': 0,
    }
    attr_keys2 = list(attr_dict2.keys())

    for i in range(len(attr_dict2)):
        val = attr2[i].text
        val = float(re.sub('[^\.\d]+','',val))
        attr_dict2[attr_keys2[i]] = val

    # combine attributes 
    def Merge(dict1, dict2):
        res = {**dict1, **dict2}
        return res
    attr_total_f2 = Merge(attr_dict1, attr_dict2)


    # get windrawloss :
    texts = soup.find_all('div', class_='c-hero__headline-suffix tz-change-inner')[0].text
    texts = re.findall('[\d+][-][\d+][-][\d+]', texts)[0]
    texts = re.split(r'[-]',texts)
#     attr_total_f2['win'] = int(texts[0])
#     attr_total_f2['lose'] = int(texts[1])
#     attr_total_f2['draw'] = int(texts[2])
    win = int(texts[0])
    lose = int(texts[1])
    draw = int(texts[2])

    attr_total_f2['win%'] = (win/(win+lose+draw))*100

    # get biography (height, reach, etc)
    bio = soup.find_all('div', class_='c-bio__info')
    bio = bio[0].text
    bio_height = re.findall(r'Height[\n]+[\d]+[.][\d]+', bio)
    bio_height = float(re.findall(r'[\d]+[.][\d]+', bio_height[0])[0])
    attr_total_f2['height'] = bio_height * 2.54
    bio_reach = re.findall(r'Reach[\n]+[\d]+[.][\d]+', bio)
    bio_reach = float(re.findall(r'[\d]+[.][\d]+', bio_reach[0])[0])
    attr_total_f2['reach'] = bio_reach * 2.54
    bio_age = re.findall(r'Age[\n]+[\d]+', bio)
    bio_age = int(re.sub(r'[\D]+','',bio_age[0]))
    attr_total_f2['age'] = bio_age 

    # combine further
    return attr_total_f2


##### Select the 2 fighters to make predictions on

In [10]:
# input name (double check from ufc.com URL for the correct format to input the names)
f1_name = 'Raulian-Paiva'
f2_name = 'Sean-OMalley'
f1 = extract_attribute(f1_name)
f2 = extract_attribute(f2_name)

##### Create dataframe for new prediction 
take note: need to fill up the following manually:
- weight_class
- title_fight
- stance_fighter1
- stance_fighter2

In [11]:
new_prediction = pd.DataFrame(columns=ufc_df.columns)
new_prediction.drop(columns='winner', inplace=True)
i = 0
new_prediction.loc[i,:] = 0

# for fighter1:
f1_key = list(f1.keys())
# new_prediction.loc[i,'win_fighter1':'draw_fighter1'] = [f1[f1_key[i]] for i in range(8,11)]
new_prediction.loc[i,'win%_fighter1'] = f1[f1_key[8]]
new_prediction.loc[i,'height_fighter1'] = f1[f1_key[9]]
new_prediction.loc[i,'reach_fighter1'] = f1[f1_key[10]]
new_prediction.loc[i,'SLpM_fighter1'] = f1[f1_key[2]]
new_prediction.loc[i,'Str_Acc_fighter1'] = f1[f1_key[0]]
new_prediction.loc[i,'SApM_fighter1'] = f1[f1_key[3]]
new_prediction.loc[i,'Str_Def_fighter1'] = f1[f1_key[6]]
new_prediction.loc[i,'TD_Avg_fighter1'] = f1[f1_key[4]]
new_prediction.loc[i,'TD_Acc_fighter1'] = f1[f1_key[1]]
new_prediction.loc[i,'TD_Def_fighter1'] = f1[f1_key[7]]
new_prediction.loc[i,'Sub_Avg_fighter1'] = f1[f1_key[5]]
new_prediction.loc[i,'age_fighter1'] = f1[f1_key[11]]


# for fighter2:
f2_key = list(f2.keys())
# new_prediction.loc[i,'win_fighter2':'draw_fighter2'] = [f2[f2_key[i]] for i in range(8,11)]
new_prediction.loc[i,'win%_fighter2'] = f2[f2_key[8]]
new_prediction.loc[i,'height_fighter2'] = f2[f2_key[9]]
new_prediction.loc[i,'reach_fighter2'] = f2[f2_key[10]]
new_prediction.loc[i,'SLpM_fighter2'] = f2[f2_key[2]]
new_prediction.loc[i,'Str_Acc_fighter2'] = f2[f2_key[0]]
new_prediction.loc[i,'SApM_fighter2'] = f2[f2_key[3]]
new_prediction.loc[i,'Str_Def_fighter2'] = f2[f2_key[6]]
new_prediction.loc[i,'TD_Avg_fighter2'] = f2[f2_key[4]]
new_prediction.loc[i,'TD_Acc_fighter2'] = f2[f2_key[1]]
new_prediction.loc[i,'TD_Def_fighter2'] = f2[f2_key[7]]
new_prediction.loc[i,'Sub_Avg_fighter2'] = f2[f2_key[5]]
new_prediction.loc[i,'age_fighter2'] = f2[f2_key[11]]


# reach_diff
new_prediction.loc[i,'reach_diff'] = new_prediction.loc[i,'reach_fighter1'] - new_prediction.loc[i,'reach_fighter2']

#### FILL UP THE BELOW DATA MANUALLY

In [12]:
new_prediction.loc[i,'weight_class'] = "Bantamweight"
new_prediction.loc[i,'title_fight'] = 'f'
new_prediction.loc[i,'stance_fighter1'] = 'Orthodox'
new_prediction.loc[i,'stance_fighter2'] = 'Switch'

### Make new prediction

In [13]:
%matplotlib inline
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

new_prediction

Unnamed: 0,weight_class,title_fight,height_fighter1,reach_fighter1,stance_fighter1,SLpM_fighter1,Str_Acc_fighter1,SApM_fighter1,Str_Def_fighter1,TD_Avg_fighter1,TD_Acc_fighter1,TD_Def_fighter1,Sub_Avg_fighter1,win%_fighter1,height_fighter2,reach_fighter2,stance_fighter2,SLpM_fighter2,Str_Acc_fighter2,SApM_fighter2,Str_Def_fighter2,TD_Avg_fighter2,TD_Acc_fighter2,TD_Def_fighter2,Sub_Avg_fighter2,win%_fighter2,reach_diff,age_fighter1,age_fighter2
0,Bantamweight,f,172.72,176.53,Orthodox,4.13,42,5.11,49,0.58,23,75,0.19,25,180.34,182.88,Switch,8.26,63,3.52,66,0.59,50,65,0.59,80,-6.35,26,27


In [14]:
predict_model(final_model, data = new_prediction)

Unnamed: 0,weight_class,title_fight,height_fighter1,reach_fighter1,stance_fighter1,SLpM_fighter1,Str_Acc_fighter1,SApM_fighter1,Str_Def_fighter1,TD_Avg_fighter1,TD_Acc_fighter1,TD_Def_fighter1,Sub_Avg_fighter1,win%_fighter1,height_fighter2,reach_fighter2,stance_fighter2,SLpM_fighter2,Str_Acc_fighter2,SApM_fighter2,Str_Def_fighter2,TD_Avg_fighter2,TD_Acc_fighter2,TD_Def_fighter2,Sub_Avg_fighter2,win%_fighter2,reach_diff,age_fighter1,age_fighter2,Label,Score
0,Bantamweight,f,172.72,176.53,Orthodox,4.13,42,5.11,49,0.58,23,75,0.19,25,180.34,182.88,Switch,8.26,63,3.52,66,0.59,50,65,0.59,80,-6.35,26,27,0,0.6484


In [15]:
res  = predict_model(final_model, data = new_prediction)
label_dict = {1:'fighter1', 0:'fighter2'}
print(f"winner predicted to be {label_dict[res['Label'][0]]}, with accuracy of {res['Score'][0]}")

winner predicted to be fighter2, with accuracy of 0.6484


### Predictions on UFC269 (just happened on 12 Dec 2021)
#### 1. Fight between Charles Oliveira vs Dustin Poirier: Prediction - Charles Oliveira to win with score of 83% 
- eventual winner was Charles Oliveira

#### 2. Fight between Amanda Nunes vs Julianna Pena: Prediction - Amanda Nunes to win with score of 79% 
- eventual winner was Julianna Pena (this was considered a massive upset)

#### 3. Fight between Geoff Neal vs Santiago Ponzinibbio: Prediction - Geoff Neal to win with score of 80% 
- eventual winner was Geoff Neal

#### 4. Fight between Kai Kara-France vs Cody Garbrandt: Prediction - Cody Garbrandt to win with score of 60% 
- eventual winner was Kai Kara-France

#### 5. Fight between Raulian Paiva vs Sean O'Malley: Prediction - Sean O'Malley to win with score of 65% 
- eventual winner was Sean O'Malley


___

### 4b. Prediction using data scraped from ufcstats.com ("View Matchup")
- TAKE NOTE: this method can ONLY be used BEFORE THE FIGHT ENDS


### e.g.1) UFC Fight Night: Lewis vs. Daukaus

In [16]:
# Indicate base url
# CHANGE THE WEBSITE BELOW FROM THE UFCSTATS
url = 'http://www.ufcstats.com/fight-details/4b7ec02b39fc6f70'
response = requests.get(url)
soup = BeautifulSoup(response.text,'lxml')
# quote = soup.find_all("tbody", class_='b-fight-details__table-body')
quote = soup.find_all("tbody")
tag = quote[0].text
string = re.sub(r'[\n]+',r'SPLIT',tag)
string = re.sub(r'[\s]+',r'',string)
string_list = string.split('SPLIT')
string_list = [string for string in string_list if string != '']
string_list

['Taleofthetape',
 'Wins/Losses/Draws',
 '25-8-0(1NC)',
 '12-3-0',
 'AverageFightTime',
 '9:45',
 '3:23',
 'Height',
 '6\'3"',
 '6\'3"',
 'Weight',
 '260lbs.',
 '250lbs.',
 'Reach',
 '79"',
 '76"',
 'Stance',
 'Orthodox',
 'Orthodox',
 'DOB',
 'Feb07,1985',
 'Sep25,1989',
 'Striking(SignificantStrikes)',
 'StrikesLandedperMin.(SLpM)',
 '2.49',
 '9.03',
 'StrikingAccuracy',
 '50%',
 '53%',
 'StrikesAbsorbedperMin.(SApM)',
 '2.48',
 '3.40',
 'Defense',
 '41%',
 '64%',
 'Grappling',
 'TakedownsAverage/15min.',
 '0.49',
 '0.00',
 'TakedownAccuracy',
 '25%',
 '0%',
 'TakedownDefense',
 '55%',
 '100%',
 'SubmissionAverage/15min.',
 '0.0',
 '0.0',
 'Mostrecentfights(NewestFirst)',
 'Loss-Gane',
 'Win-Abdurakhimov',
 'Win-Blaydes',
 'Win-Oleinik',
 'Win-Oleinik',
 'Win-Nascimento',
 'Win-Latifi',
 'Win-Porter',
 'Win-Ivanov']

### e.g.2) UFC 269: Oliveira vs. Poirier

In [17]:
# # Indicate base url
# # CHANGE THE WEBSITE BELOW FROM THE UFCSTATS
# url = 'http://www.ufcstats.com/fight-details/b22eab3aa1522f40'
# response = requests.get(url)
# soup = BeautifulSoup(response.text,'lxml')
# # quote = soup.find_all("tbody", class_='b-fight-details__table-body')
# quote = soup.find_all("tbody")
# tag = quote[0].text
# string = re.sub(r'[\n]+',r'SPLIT',tag)
# string = re.sub(r'[\s]+',r'',string)
# string_list = string.split('SPLIT')
# string_list = [string for string in string_list if string != '']
# string_list

#### Scrape data online and put into dataframe for prediction

In [18]:
def height(data):
    z = re.sub(r'[\"]','', data)
    z = re.split(r"[']", z)
    return (float(z[0])*30.48 + float(z[1])*2.54)

def reach(data):
    z = re.sub(r'[\"]','', data)
    return (float(z)*2.54)

new_prediction = pd.DataFrame(columns=ufc_df.columns)
new_prediction.drop(columns='winner', inplace=True)
i = 0
new_prediction.loc[i,:] = 0

# fighter 1
# new_prediction.loc[i,2] = int(re.split('[-]',string_list[2])[0]) # win f1
# new_prediction.loc[i,3] = int(re.split('[-]',string_list[2])[1]) # lose f1
# new_prediction.loc[i,4] = int(re.split(r'\(',re.split('[-]',string_list[2])[2])[0]) # draw f1
win = int(re.split('[-]',string_list[2])[0]) # win f1
lose = int(re.split('[-]',string_list[2])[1]) # lose f1
draw = int(re.split(r'\(',re.split('[-]',string_list[2])[2])[0]) # draw f1

new_prediction.loc[i,'height_fighter1'] = height(string_list[8]) # height f1
new_prediction.loc[i,'reach_fighter1'] = reach(string_list[14]) # reach f1
new_prediction.loc[i,'stance_fighter1'] = (string_list[17]) # stance f1
# fight stats fighter 1
new_prediction.loc[i,'SLpM_fighter1'] = float(string_list[24]) # SLpM f1
new_prediction.loc[i,'Str_Acc_fighter1'] = float(re.sub('%','', string_list[27])) # SA f1
new_prediction.loc[i,'SApM_fighter1'] = float(string_list[30]) # SApM f1
new_prediction.loc[i,'Str_Def_fighter1'] = float(re.sub('%','', string_list[33])) # SD f1
new_prediction.loc[i,'TD_Avg_fighter1'] = float(string_list[37]) # TD_avg f1
new_prediction.loc[i,'TD_Acc_fighter1'] = float(re.sub('%','', string_list[40])) # TD_acc f1
new_prediction.loc[i,'TD_Def_fighter1'] = float(re.sub('%','', string_list[43])) # TD_def f1
new_prediction.loc[i,'Sub_Avg_fighter1'] = float(string_list[46]) # Sub_avg f1
new_prediction.loc[i,'win%_fighter1'] = (win / (win+lose+draw))*100


# fighter 2
# new_prediction.loc[i,17] = int(re.split('[-]',string_list[3])[0]) # win f2
# new_prediction.loc[i,18] = int(re.split('[-]',string_list[3])[1]) # lose f2
# new_prediction.loc[i,19] = int(re.split(r'\(',re.split('[-]',string_list[3])[2])[0]) # draw f2
win = int(re.split('[-]',string_list[3])[0]) # win f2
lose = int(re.split('[-]',string_list[3])[1]) # lose f2
draw = int(re.split(r'\(',re.split('[-]',string_list[3])[2])[0]) # draw f2
new_prediction.loc[i,'height_fighter2']  = height(string_list[9]) # height f2
new_prediction.loc[i,'reach_fighter2'] = reach(string_list[15]) # reach f2
new_prediction.loc[i,'stance_fighter2']  = (string_list[18]) # stance f2
# fight stats fighter 2
new_prediction.loc[i,'SLpM_fighter2'] = float(string_list[25]) # SLpM f2
new_prediction.loc[i,'Str_Acc_fighter2'] = float(re.sub('%','', string_list[28])) # SA f2
new_prediction.loc[i,'SApM_fighter2'] = float(string_list[31]) # SApM f2
new_prediction.loc[i,'Str_Def_fighter2'] = float(re.sub('%','', string_list[34])) # SD f2
new_prediction.loc[i,'TD_Avg_fighter2']  = float(string_list[38]) # TD_avg f2
new_prediction.loc[i,'TD_Acc_fighter2']  = float(re.sub('%','', string_list[41])) # TD_acc f2
new_prediction.loc[i,'TD_Def_fighter2']  = float(re.sub('%','', string_list[44])) # TD_def f2
new_prediction.loc[i,'Sub_Avg_fighter2'] = float(string_list[47]) # Sub_avg f2
new_prediction.loc[i,'win%_fighter2'] = (win / (win+lose+draw))*100

# reach_diff
new_prediction.loc[i,'reach_diff'] = new_prediction.loc[i,'reach_fighter1'] - new_prediction.loc[i,'reach_fighter2']

#### The following values need to be manually input

In [19]:
new_prediction['weight_class'] = "Heavyweight"
new_prediction['title_fight'] = "f"

new_prediction['age_fighter1'] = 36
new_prediction['age_fighter2'] = 32
new_prediction

Unnamed: 0,weight_class,title_fight,height_fighter1,reach_fighter1,stance_fighter1,SLpM_fighter1,Str_Acc_fighter1,SApM_fighter1,Str_Def_fighter1,TD_Avg_fighter1,TD_Acc_fighter1,TD_Def_fighter1,Sub_Avg_fighter1,win%_fighter1,height_fighter2,reach_fighter2,stance_fighter2,SLpM_fighter2,Str_Acc_fighter2,SApM_fighter2,Str_Def_fighter2,TD_Avg_fighter2,TD_Acc_fighter2,TD_Def_fighter2,Sub_Avg_fighter2,win%_fighter2,reach_diff,age_fighter1,age_fighter2
0,Heavyweight,f,190.5,200.66,Orthodox,2.49,50,2.48,41,0.49,25,55,0,75.7576,190.5,193.04,Orthodox,9.03,53,3.4,64,0,0,100,0,80,7.62,36,32


#### Prediction

In [20]:
res  = predict_model(final_model, data = new_prediction)
res

label_dict = {1:'fighter1', 0:'fighter2'}
print(f"winner predicted to be {label_dict[res['Label'][0]]}, with accuracy of {res['Score'][0]}")

winner predicted to be fighter1, with accuracy of 0.502


#### 1. Fight between Lewis vs. Daukaus predicted to be for Lewis to win with score of 50% (this was predicted before the fight on 18 Dec 2021)
- fight to commence on 18 Dec 2021

#### 2. Fight between Oliveira vs. Poirier predicted to be for Oliveira to win with score of 85% (this was predicted before the fight on 12 Dec 2021)
- eventual winner was Oliveira

#### 3. Fight between Amanda Nunes vs Julianna Pena predicted to be for Amanda Nunes to win with score of 93% (this was predicted before the fight on 12 Dec 2021)
- eventual winner was Julianna Pena (this was considered a massive upset)

____

### 4c. Prediction using manual inputs of data
- uncomment and manually input the data to use this method

In [21]:
# # Bronx vs Porier
# new_fight = {
# 'weight_class': 'Lightweight',
# 'title_fight':'t',
# 'age_fighter1':32,
# 'win_fighter1':31,
# 'lose_fighter1':8,
# 'draw_fighter1':0,
# 'height_fighter1':[5,10],
# 'reach_fighter1':74,
# 'stance_fighter1':'Orthodox',
# 'SLpM_fighter1':3.26,
# 'Str_Acc_fighter1':52,
# 'SApM_fighter1':3.01,
# 'Str_Def_fighter1':53,
# 'TD_Avg_fighter1':2.64,
# 'TD_Acc_fighter1':44,
# 'TD_Def_fighter1':57,
# 'Sub_Avg_fighter1':2.7,
# 'win%_fighter1':0,
# 'age_fighter2':32,
# 'win_fighter2':28,
# 'lose_fighter2':6,
# 'draw_fighter2':0,
# 'height_fighter2':[5,9],
# 'reach_fighter2':72,
# 'stance_fighter2':'Southpaw',
# 'SLpM_fighter2':5.62,
# 'Str_Acc_fighter2':50,
# 'SApM_fighter2':4.19,
# 'Str_Def_fighter2':54,
# 'TD_Avg_fighter2':1.5,
# 'TD_Acc_fighter2':36,
# 'TD_Def_fighter2':61,
# 'Sub_Avg_fighter2':1.2,
# 'win%_fighter2':0,
# 'reach_diff':0
# }
# new_data = pd.DataFrame(columns=ufc_df.columns)
# new_data.drop(columns='winner', inplace=True)
# new_data = new_data.append(new_fight, ignore_index=True)

# new_data['height_fighter1'] = new_data['height_fighter1'].map(lambda x: (float(x[0])*30.48 + (float(x[1])*2.54)))
# new_data['height_fighter2'] = new_data['height_fighter2'].map(lambda x: (float(x[0])*30.48 + (float(x[1])*2.54)))

# new_data['reach_fighter1'] = new_data['reach_fighter1'].map(lambda x: x*2.54)
# new_data['reach_fighter2'] = new_data['reach_fighter2'].map(lambda x: x*2.54)

# new_data['win%_fighter1'] = (new_data['win_fighter1'] / (new_data['win_fighter1'] + 
#                                                         new_data['lose_fighter1'] + new_data['draw_fighter1']))*100
# new_data['win%_fighter2'] = (new_data['win_fighter2'] / (new_data['win_fighter2'] + 
#                                                         new_data['lose_fighter2'] + new_data['draw_fighter2']))*100

In [22]:
# res  = predict_model(final_model_lgbm, data = new_data)
# res