In [3]:
#get data on ways fighter won/lost through dec, won/lost through sub, won/lost through ko/tko  
#tabulate outcomes in a table such as: Outcome, % chance, winner 
#FRONT END: give the user the ability to input two fighters (drop down menu)
# ---- FINISHED --- 
#extra: prevent users from picking two fighters that are from different weight classes
#--- LONG RUN --- 
# Improve model where it takes # of strikes (successful, attempted) , ground game/submissions duration, takedowns (successful, attempted)
# into consideration 

In [4]:
#get accuracy of model by spitting data into train/test
import pandas as pd 

In [5]:
df = pd.read_csv("UFC Matches April 2023.csv", )

In [6]:
df.head()

Unnamed: 0,Full Name,Weight,Matchup,Outcome,Method,Round,Time
0,Tom Aaron,WEIGHT: 155 lbs.,,,,,
1,Tom Aaron,WEIGHT: 155 lbs.,Tom Aaron\n \n Matt Ricehouse,LOSS,U-DEC,3.0,5:00
2,Tom Aaron,WEIGHT: 155 lbs.,Tom Aaron\n \n Eric Steenberg,WIN,SUB,1.0,0:56
3,Danny Abbadi,WEIGHT: 155 lbs.,,,,,
4,Danny Abbadi,WEIGHT: 155 lbs.,Danny Abbadi\n \n Jorge Gurgel,LOSS,S-DEC,3.0,5:00


## Data Cleaning

We have to clean our dataset then group each fighter and get their distributions to determine their likelihood of winning/losing via decision, tko/ko or submission.

In [7]:
# delete first row of every fighter
nulls = df[["Matchup", "Outcome", 'Method', 'Round', 'Time']].isnull().all(axis=1) 
df = df.loc[~nulls, :]

#find matches that are upcoming and delete them
df = df[df["Outcome"] != 'NEXT']

# delete "weight text" in weight column
df['Weight'] = df['Weight'].str.replace('WEIGHT: ', '')

# keep only opponents name in matchup column
def remove_name(orig_matchup):
    last_index = orig_matchup.rfind('\n')
    new_string = orig_matchup[last_index +1:]
    return new_string.strip()

df['Matchup'] = df['Matchup'].apply(lambda x: remove_name(x))

In [8]:
unique_methods = df['Method'].unique()

In [9]:
unique_methods

array(['U-DEC', 'SUB', 'S-DEC', 'KO/TKO', 'Overturned', 'Other', 'M-DEC',
       'CNC', 'DQ', 'SUB ', 'KO/TKO ', 'S-DEC ', 'Decision', 'U-DEC ',
       'M-DEC ', 'Overturned ', 'CNC '], dtype=object)

In [10]:
unique_outcomes = df['Outcome'].unique()

In [11]:
unique_outcomes

array(['LOSS', 'WIN', 'NC', 'DRAW'], dtype=object)

In [12]:
removed_outcomes = ['NC', 'DRAW']
removed_methods = ['Overturned', 'Other', 'CNC', 'DQ', 'Overturned ', 'CNC ']

In [13]:
filtered_df1 = df.loc[~df['Method'].isin(removed_methods)]
filtered_df2 = filtered_df1.loc[~df['Outcome'].isin(removed_outcomes)]

In [14]:
filtered_df2

Unnamed: 0,Full Name,Weight,Matchup,Outcome,Method,Round,Time
1,Tom Aaron,155 lbs.,Matt Ricehouse,LOSS,U-DEC,3.0,5:00
2,Tom Aaron,155 lbs.,Eric Steenberg,WIN,SUB,1.0,0:56
4,Danny Abbadi,155 lbs.,Jorge Gurgel,LOSS,S-DEC,3.0,5:00
5,Danny Abbadi,155 lbs.,Kalib Starnes,LOSS,SUB,1.0,2:56
7,Nariman Abbasov,155 lbs.,Ismael Bonfim,LOSS,U-DEC,3.0,5:00
...,...,...,...,...,...,...,...
22976,Alex Zuniga,145 lbs.,Bryan Caraway,LOSS,U-DEC,3.0,5:00
22979,Allan Zuniga,155 lbs.,John Gunther,LOSS,M-DEC,3.0,5:00
22981,Virgil Zwicker,205 lbs.,Guto Inocente,LOSS,U-DEC,3.0,5:00
22982,Virgil Zwicker,205 lbs.,Brett Albee,WIN,KO/TKO,1.0,1:46


In [15]:
df = filtered_df2

## Exploratory Data Analysis (EDA)

In [16]:
#questions that could be of importance:
#what is the most common method of winning?
#for fighters that win through ko/submission what round does it typically occur in?
#is there a discrepancy in the way fighters win across different weight classes?

In [17]:
#Jan vs. Izzy:

#Outcome	% chance	Winner
#KO	44.9%	Jan
#DEC	29.1%	Israel
#DEC	13.2%	Jan
#KO	9.48%	Israel
#SUB	2.9%	Israel
#SUB	.4%	Jan

In [18]:
#name, total matches, wins, losses, wins by dec, wins by sub, wins by ko/tko, loss by dec, loss by sub, loss by ko/tko

In [19]:
decisions = ['U-DEC', 'S-DEC', 'M-DEC', 'S-DEC ', 'U-DEC ', 'M-DEC ']
kos = ['KO/TKO', 'KO/TKO ']
submissions = ['SUB', 'SUB ']


data = {
    'Total Fights': df.groupby(['Full Name'])['Matchup'].count(),
    'Total Wins': df.loc[df['Outcome'] == 'WIN'].groupby(['Full Name'])['Outcome'].count(),
    'Total Losses': df.loc[df['Outcome'] == 'LOSS'].groupby(['Full Name'])['Outcome'].count(),
    'Wins By Decision': df.loc[(df['Outcome'] == 'WIN') & (df['Method'].isin(decisions))].groupby(['Full Name'])['Outcome'].count(),
    'Wins By KO': df.loc[(df['Outcome'] == 'WIN') & (df['Method'].isin(kos))].groupby(['Full Name'])['Outcome'].count(),
    'Wins By Submission': df.loc[(df['Outcome'] == 'WIN') & (df['Method'].isin(submissions))].groupby(['Full Name'])['Outcome'].count(),
    'Loss By Decision': df.loc[(df['Outcome'] == 'LOSS') & (df['Method'].isin(decisions))].groupby(['Full Name'])['Outcome'].count(),
    'Loss By KO': df.loc[(df['Outcome'] == 'LOSS') & (df['Method'].isin(kos))].groupby(['Full Name'])['Outcome'].count(),
    'Loss By Submission': df.loc[(df['Outcome'] == 'LOSS') & (df['Method'].isin(submissions))].groupby(['Full Name'])['Outcome'].count()
}

In [20]:
fighter_stats = pd.DataFrame(data)

In [21]:
#df[df['Full Name'] == 'AJ Fonseca']

In [22]:
fighter_stats.fillna(0, inplace=True)

In [23]:
fighter_stats

Unnamed: 0,Total Fights,Total Wins,Total Losses,Wins By Decision,Wins By KO,Wins By Submission,Loss By Decision,Loss By KO,Loss By Submission
AJ Dobson,3,1.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0
AJ Fletcher,4,2.0,2.0,0.0,1.0,1.0,2.0,0.0,0.0
AJ Fonseca,1,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
AJ Matthews,1,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
Aalon Cruz,3,1.0,2.0,0.0,1.0,0.0,0.0,2.0,0.0
...,...,...,...,...,...,...,...,...,...
Zhang Tiequan,6,2.0,4.0,0.0,0.0,2.0,3.0,1.0,0.0
Zhang Weili,9,7.0,2.0,3.0,2.0,2.0,1.0,1.0,0.0
Zoila Frausto,2,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0
Zubaira Tukhugov,8,5.0,3.0,3.0,2.0,0.0,3.0,0.0,0.0


In [32]:
df = fighter_stats

## Monte Carlo Simulation

In [48]:
import math
import random as rnd

In [29]:
def calculate_mean(statistic, total_fights):
    return statistic/total_fights

def calculate_sd(statistic, total_fights):
    mean = calculate_mean(statistic, total_fights)
    return math.sqrt(mean*(1-mean))/total_fights

In [39]:
def get_fighter_parameters(fighter1, fighter2):
    data = {"Decision Wins Mean" : [calculate_mean(df.loc[fighter1, "Wins By Decision"], df.loc[fighter1, "Total Fights"]),
                                   calculate_mean(df.loc[fighter2, "Wins By Decision"], df.loc[fighter2, "Total Fights"])], 
            
           "Decision Wins SD" : [calculate_sd(df.loc[fighter1, "Wins By Decision"], df.loc[fighter1, "Total Fights"]),
                                   calculate_sd(df.loc[fighter2, "Wins By Decision"], df.loc[fighter2, "Total Fights"])],
            
           "KO Wins Mean" : [calculate_mean(df.loc[fighter1, "Wins By KO"], df.loc[fighter1, "Total Fights"]),
                                   calculate_mean(df.loc[fighter2, "Wins By KO"], df.loc[fighter2, "Total Fights"])], 
            
            'KO Wins SD' : [calculate_sd(df.loc[fighter1, "Wins By KO"], df.loc[fighter1, "Total Fights"]),
                                   calculate_sd(df.loc[fighter2, "Wins By KO"], df.loc[fighter2, "Total Fights"])], 
           
            'Sub Wins Mean' : [calculate_mean(df.loc[fighter1, "Wins By Submission"], df.loc[fighter1, "Total Fights"]),
                                   calculate_mean(df.loc[fighter2, "Wins By Submission"], df.loc[fighter2, "Total Fights"])],
           
            'Sub Wins SD' : [calculate_sd(df.loc[fighter1, "Wins By Submission"], df.loc[fighter1, "Total Fights"]),
                                   calculate_sd(df.loc[fighter2, "Wins By Submission"], df.loc[fighter2, "Total Fights"])], 
            
            'Decision Loss Mean' : [calculate_mean(df.loc[fighter1, "Loss By Decision"], df.loc[fighter1, "Total Fights"]),
                                   calculate_mean(df.loc[fighter2, "Loss By Decision"], df.loc[fighter2, "Total Fights"])], 
            
            'Decision Loss SD' : [calculate_sd(df.loc[fighter1, "Loss By Decision"], df.loc[fighter1, "Total Fights"]),
                                   calculate_sd(df.loc[fighter2, "Loss By Decision"], df.loc[fighter2, "Total Fights"])], 
            
            'KO Loss Mean' : [calculate_mean(df.loc[fighter1, "Loss By KO"], df.loc[fighter1, "Total Fights"]),
                                   calculate_mean(df.loc[fighter2, "Loss By KO"], df.loc[fighter2, "Total Fights"])], 
            
            'KO Loss SD' : [calculate_sd(df.loc[fighter1, "Loss By KO"], df.loc[fighter1, "Total Fights"]),
                                   calculate_sd(df.loc[fighter2, "Loss By KO"], df.loc[fighter2, "Total Fights"])], 
           
            'Sub Loss Mean' : [calculate_mean(df.loc[fighter1, "Loss By Submission"], df.loc[fighter1, "Total Fights"]),
                                   calculate_mean(df.loc[fighter2, "Loss By Submission"], df.loc[fighter2, "Total Fights"])], 
            
            'Sub Loss SD' : [calculate_sd(df.loc[fighter1, "Loss By Submission"], df.loc[fighter1, "Total Fights"]),
                                   calculate_sd(df.loc[fighter2, "Loss By Submission"], df.loc[fighter2, "Total Fights"])]}
    
    fighter_parameter_df = pd.DataFrame(data=data, index = [fighter1, fighter2])
    return fighter_parameter_df

In [46]:
matchup_df = get_fighter_parameters("Israel Adesanya", "Jon Jones")

In [61]:
def gameSim():
    results = []
    #GSWScore = (rnd.gauss(gswmeanpts,gswsdpts)+ rnd.gauss(clmeanopp,clsdopp))/2
    fighter1_dec_score = (rnd.gauss(matchup_df.iloc[0]['Decision Wins Mean'],matchup_df.iloc[0]['Decision Wins SD'])+ 
                          rnd.gauss(matchup_df.iloc[1]['Decision Loss Mean'],matchup_df.iloc[1]['Decision Loss SD']))/2
    fighter1_ko_score = (rnd.gauss(matchup_df.iloc[0]['KO Wins Mean'],matchup_df.iloc[0]['KO Wins SD'])+ 
                          rnd.gauss(matchup_df.iloc[1]['KO Loss Mean'],matchup_df.iloc[1]['KO Loss SD']))/2
    fighter1_sub_score = (rnd.gauss(matchup_df.iloc[0]['Sub Wins Mean'],matchup_df.iloc[0]['Sub Wins SD'])+ 
                          rnd.gauss(matchup_df.iloc[1]['Sub Loss Mean'],matchup_df.iloc[1]['Sub Loss SD']))/2
    fighter2_dec_score = (rnd.gauss(matchup_df.iloc[1]['Decision Wins Mean'],matchup_df.iloc[1]['Decision Wins SD'])+ 
                          rnd.gauss(matchup_df.iloc[0]['Decision Loss Mean'],matchup_df.iloc[0]['Decision Loss SD']))/2
    fighter2_ko_score = (rnd.gauss(matchup_df.iloc[1]['KO Wins Mean'],matchup_df.iloc[1]['KO Wins SD'])+ 
                          rnd.gauss(matchup_df.iloc[0]['KO Loss Mean'],matchup_df.iloc[0]['KO Loss SD']))/2
    fighter2_sub_score = (rnd.gauss(matchup_df.iloc[1]['Sub Wins Mean'],matchup_df.iloc[1]['Sub Wins SD'])+ 
                          rnd.gauss(matchup_df.iloc[0]['Sub Loss Mean'],matchup_df.iloc[0]['Sub Loss SD']))/2
    
    results.append(fighter1_dec_score)
    results.append(fighter1_ko_score)
    results.append(fighter1_sub_score)    
    results.append(fighter2_dec_score)    
    results.append(fighter2_ko_score)    
    results.append(fighter2_sub_score)    
    print(results)
    #if int(round(GSWScore)) > int(round(CLScore)):
    #    return 1
    #elif int(round(GSWScore)) < int(round(CLScore)):
     #   return -1
    #else: return 0

In [62]:
gameSim()

[0.2526319189080937, 0.1429785280949982, 0.0, 0.2586104541045792, 0.14197600679029518, 0.13699103743644706]
