In [162]:
#get data on ways fighter won/lost through dec, won/lost through sub, won/lost through ko/tko  
#tabulate outcomes in a table such as: Outcome, % chance, winner 
#FRONT END: give the user the ability to input two fighters (drop down menu)
#extra: prevent users from picking two fighters that are from different weight classes
# ---- FINISHED --- 
#--- LONG RUN --- 
# Improve model where it takes # of strikes (successful, attempted) , ground game/submissions duration, takedowns (successful, attempted)
# into consideration 

In [163]:
import pandas as pd 

In [164]:
df = pd.read_csv("UFC Matches April 2023.csv")

In [165]:
df.head()

Unnamed: 0,Full Name,Weight,Matchup,Outcome,Method,Round,Time
0,Tom Aaron,WEIGHT: 155 lbs.,,,,,
1,Tom Aaron,WEIGHT: 155 lbs.,Tom Aaron\n \n Matt Ricehouse,LOSS,U-DEC,3.0,5:00
2,Tom Aaron,WEIGHT: 155 lbs.,Tom Aaron\n \n Eric Steenberg,WIN,SUB,1.0,0:56
3,Danny Abbadi,WEIGHT: 155 lbs.,,,,,
4,Danny Abbadi,WEIGHT: 155 lbs.,Danny Abbadi\n \n Jorge Gurgel,LOSS,S-DEC,3.0,5:00


## Data Cleaning

We have to clean our dataset then group each fighter and get their distributions to determine their likelihood of winning/losing via decision, tko/ko or submission.

In [166]:
# delete first row of every fighter
nulls = df[["Matchup", "Outcome", 'Method', 'Round', 'Time']].isnull().all(axis=1) 
df = df.loc[~nulls, :]

#find matches that are upcoming and delete them
df = df[df["Outcome"] != 'NEXT']

# delete "weight text" in weight column
df['Weight'] = df['Weight'].str.replace('WEIGHT: ', '')

# keep only opponents name in matchup column
def remove_name(orig_matchup):
    last_index = orig_matchup.rfind('\n')
    new_string = orig_matchup[last_index +1:]
    return new_string.strip()

df['Matchup'] = df['Matchup'].apply(lambda x: remove_name(x))

In [167]:
unique_methods = df['Method'].unique()

In [168]:
unique_methods

array(['U-DEC', 'SUB', 'S-DEC', 'KO/TKO', 'Overturned', 'Other', 'M-DEC',
       'CNC', 'DQ', 'SUB ', 'KO/TKO ', 'S-DEC ', 'Decision', 'U-DEC ',
       'M-DEC ', 'Overturned ', 'CNC '], dtype=object)

In [169]:
unique_outcomes = df['Outcome'].unique()

In [170]:
unique_outcomes

array(['LOSS', 'WIN', 'NC', 'DRAW'], dtype=object)

In [171]:
removed_outcomes = ['NC', 'DRAW']
removed_methods = ['Overturned', 'Other', 'CNC', 'DQ', 'Overturned ', 'CNC ']

In [172]:
filtered_df1 = df.loc[~df['Method'].isin(removed_methods)]
filtered_df2 = filtered_df1.loc[~df['Outcome'].isin(removed_outcomes)]

In [173]:
df = filtered_df2

In [174]:
df_dup = filtered_df2 #this is our final dataset which includes all cleaning filters

In [139]:
#clean up "SUB " and "SUB" etc

### Train - Test Split

In [142]:
df_dup

Unnamed: 0,Full Name,Weight,Matchup,Outcome,Method,Round,Time
1,Tom Aaron,155 lbs.,Matt Ricehouse,LOSS,U-DEC,3.0,5:00
2,Tom Aaron,155 lbs.,Eric Steenberg,WIN,SUB,1.0,0:56
4,Danny Abbadi,155 lbs.,Jorge Gurgel,LOSS,S-DEC,3.0,5:00
5,Danny Abbadi,155 lbs.,Kalib Starnes,LOSS,SUB,1.0,2:56
7,Nariman Abbasov,155 lbs.,Ismael Bonfim,LOSS,U-DEC,3.0,5:00
...,...,...,...,...,...,...,...
22976,Alex Zuniga,145 lbs.,Bryan Caraway,LOSS,U-DEC,3.0,5:00
22979,Allan Zuniga,155 lbs.,John Gunther,LOSS,M-DEC,3.0,5:00
22981,Virgil Zwicker,205 lbs.,Guto Inocente,LOSS,U-DEC,3.0,5:00
22982,Virgil Zwicker,205 lbs.,Brett Albee,WIN,KO/TKO,1.0,1:46


In [175]:
fighter_counts = df.groupby('Full Name').count()
fighter_counts = fighter_counts[fighter_counts['Outcome'] >= 5]
#fighter_counts = fighter_counts[fighter_counts['Outcome'] >= 10]
df_filtered = df[df['Full Name'].isin(fighter_counts.index)] #creates a dataset of all the matches of fighters with >= 5 matches 

In [176]:
df_sampled = df_filtered.groupby('Full Name').apply(lambda x: x.sample(n=1, random_state=1))

In [177]:
df_sampled = df_sampled.reset_index(drop=True)

In [178]:
df_sampled #one randomly chosen match from each fighter that has >= 5 matches in their history

Unnamed: 0,Full Name,Weight,Matchup,Outcome,Method,Round,Time
0,Aaron Riley,155 lbs.,Joe Brammer,WIN,U-DEC,3.0,5:00
1,Aaron Rosa,205 lbs.,Joey Beltran,LOSS,KO/TKO,3.0,1:26
2,Aaron Simpson,170 lbs.,Ronny Markes,LOSS,S-DEC,3.0,5:00
3,Abdul Razak Alhassan,185 lbs.,Alessio Di Chirico,WIN,KO/TKO,1.0,0:17
4,Abel Trujillo,155 lbs.,Marcus LeVesseur,WIN,KO/TKO,2.0,3:56
...,...,...,...,...,...,...,...
1256,Zelg Galesic,185 lbs.,Taiei Kin,WIN,KO/TKO,1.0,1:05
1257,Zhalgas Zhumagulov,125 lbs.,Manel Kape,LOSS,KO/TKO,1.0,4:02
1258,Zhang Tiequan,155 lbs.,Darren Elkins,LOSS,U-DEC,3.0,5:00
1259,Zhang Weili,115 lbs.,Danielle Taylor,WIN,U-DEC,3.0,5:00


In [188]:
new_df = pd.DataFrame(columns=['Full Name', 'Weight', 'Matchup', 'Outcome', 'Method', 'Round', 'Time'])

i = 0
for index, row in df_sampled.iterrows():
    if row['Full Name'] == df_dup["Matchup"].iloc[i] and row['Matchup'] == df_dup['Full Name'].iloc[i] and row["Method"] == df_dup["Method"].iloc[i] and row["Round"] == df_dup["Round"].iloc[i] and row["Time"] == df_dup["Time"].iloc[i]:
        new_df = new_df.append(row, ignore_index=True)
        i += 1

In [195]:
df_dup= df_dup.reset_index(drop = True)
df_sampled = df_sampled.reset_index(drop = True)

In [None]:
#add the mirrored matches to df_sampled

In [179]:
#based on the randomly sampled matches, get the two fighters and store them in a tuple
test_list = []
for index, row in df_sampled.iterrows():
    test_list.append((row['Full Name'] , row['Matchup']))

In [151]:
#len(test_list)

In [152]:
for index, row in df_sampled.iterrows():
    df_dup = df_dup.drop(df_dup[(df_dup['Full Name'] == row['Full Name']) & (df_dup['Weight'] == row['Weight']) & (df_dup['Matchup'] == row['Matchup']) & (df_dup['Outcome'] == row['Outcome']) & (df_dup['Method'] == row['Method']) & (df_dup['Round'] == row['Round']) & (df_dup['Time'] == row['Time'])].index)

In [155]:
df_dup.shape

(17226, 7)

In [159]:
for index, row in df_sampled.iterrows():
    df_dup = df_dup.drop(df_dup[(df_dup['Full Name'] == row['Matchup']) & (df_dup['Matchup'] == row['Full Name']) & (df_dup['Method'] == row['Method']) & (df_dup['Round'] == row['Round']) & (df_dup['Time'] == row['Time'])].index)

In [160]:
df_dup

Unnamed: 0,Full Name,Weight,Matchup,Outcome,Method,Round,Time
1,Tom Aaron,155 lbs.,Matt Ricehouse,LOSS,U-DEC,3.0,5:00
2,Tom Aaron,155 lbs.,Eric Steenberg,WIN,SUB,1.0,0:56
4,Danny Abbadi,155 lbs.,Jorge Gurgel,LOSS,S-DEC,3.0,5:00
5,Danny Abbadi,155 lbs.,Kalib Starnes,LOSS,SUB,1.0,2:56
7,Nariman Abbasov,155 lbs.,Ismael Bonfim,LOSS,U-DEC,3.0,5:00
...,...,...,...,...,...,...,...
22976,Alex Zuniga,145 lbs.,Bryan Caraway,LOSS,U-DEC,3.0,5:00
22979,Allan Zuniga,155 lbs.,John Gunther,LOSS,M-DEC,3.0,5:00
22981,Virgil Zwicker,205 lbs.,Guto Inocente,LOSS,U-DEC,3.0,5:00
22982,Virgil Zwicker,205 lbs.,Brett Albee,WIN,KO/TKO,1.0,1:46


In [121]:
test_list

[('Aaron Riley', 'Joe Brammer'),
 ('Aaron Rosa', 'Joey Beltran'),
 ('Aaron Simpson', 'Ronny Markes'),
 ('Abdul Razak Alhassan', 'Alessio Di Chirico'),
 ('Abel Trujillo', 'Marcus LeVesseur'),
 ('Adlan Amagov', 'Keith Berry'),
 ('Adrian Yanez', 'Brady Huang'),
 ('Adriano Martins', 'Keita Nakamura'),
 ('Aiemann Zahabi', 'Vince Morales'),
 ('Akihiro Gono', 'Dan Hardy'),
 ('Akira Corassani', 'Dustin Poirier'),
 ('Akira Shoji', 'Ebenezer Fontes Braga'),
 ('Al Iaquinta', 'Kevin Lee'),
 ('Alan Belcher', 'Jason MacDonald'),
 ('Alan Jouban', 'Ben Saunders'),
 ('Alan Patrick', 'Garett Whiteley'),
 ('Alatengheili', 'Casey Kenney'),
 ('Albert Morales', 'Brett Johns'),
 ('Albert Tumenov', 'Ildemar Alcantara'),
 ('Alejandro Perez', 'Song Yadong'),
 ('Aleksandar Rakic', 'Francimar Barroso'),
 ('Aleksander Emelianenko', 'Assuerio Silva'),
 ('Aleksei Oleinik', 'Chris Daukaus'),
 ('Alessio Di Chirico', 'Joaquin Buckley'),
 ('Alessio Sakara', 'Chris Weidman'),
 ('Alex Caceres', 'Sergio Pettis'),
 ('Alex C

In [81]:
training_df = df_dup

In [None]:
#remove the reverse matchup as well

## Exploratory Data Analysis (EDA)

In [82]:
#questions that could be of importance:
#what is the most common method of winning?
#for fighters that win through ko/submission what round does it typically occur in?
#is there a discrepancy in the way fighters win across different weight classes?

In [83]:
decisions = ['U-DEC', 'S-DEC', 'M-DEC', 'S-DEC ', 'U-DEC ', 'M-DEC ']
kos = ['KO/TKO', 'KO/TKO ']
submissions = ['SUB', 'SUB ']

In [84]:
#calculating proportions of outcomes
decision_df = df[df['Method'].isin(decisions)]
decision_count = decision_df['Method'].count()

ko_df = df[df['Method'].isin(kos)]
ko_count = ko_df['Method'].count()

submission_df = df[df['Method'].isin(submissions)]
submission_count = submission_df['Method'].count()

print(decision_count/len(df))
print(ko_count/len(df))
print(submission_count/len(df))

0.4286023366508005
0.34714409346603203
0.223929035049762


In [85]:
#make multipliers?
#0.4286023366508005 --> 43% end by decision
#0.34714409346603203 --> 35% end by ko
#0.223929035049762 --> 22% end by submission

In [86]:
def transform_dataset(df):
    data = {
    'Total Fights': df.groupby(['Full Name'])['Matchup'].count(),
    'Total Wins': df.loc[df['Outcome'] == 'WIN'].groupby(['Full Name'])['Outcome'].count(),
    'Total Losses': df.loc[df['Outcome'] == 'LOSS'].groupby(['Full Name'])['Outcome'].count(),
    'Wins By Decision': df.loc[(df['Outcome'] == 'WIN') & (df['Method'].isin(decisions))].groupby(['Full Name'])['Outcome'].count(),
    'Wins By KO': df.loc[(df['Outcome'] == 'WIN') & (df['Method'].isin(kos))].groupby(['Full Name'])['Outcome'].count(),
    'Wins By Submission': df.loc[(df['Outcome'] == 'WIN') & (df['Method'].isin(submissions))].groupby(['Full Name'])['Outcome'].count(),
    'Loss By Decision': df.loc[(df['Outcome'] == 'LOSS') & (df['Method'].isin(decisions))].groupby(['Full Name'])['Outcome'].count(),
    'Loss By KO': df.loc[(df['Outcome'] == 'LOSS') & (df['Method'].isin(kos))].groupby(['Full Name'])['Outcome'].count(),
    'Loss By Submission': df.loc[(df['Outcome'] == 'LOSS') & (df['Method'].isin(submissions))].groupby(['Full Name'])['Outcome'].count()
}
    fighter_stats = pd.DataFrame(data)
    fighter_stats.fillna(0, inplace=True)
    return fighter_stats

In [87]:
df = transform_dataset(training_df)

## Monte Carlo Simulation

In [88]:
import math
import random as rnd

In [89]:
def calculate_mean(statistic, total_fights):
    return statistic/total_fights

def calculate_sd(statistic, total_fights):
    mean = calculate_mean(statistic, total_fights)
    return math.sqrt((mean*(1-mean))/total_fights)

In [90]:
def get_fighter_parameters(fighter1, fighter2):
    data = {"Decision Wins Mean" : [calculate_mean(df.loc[fighter1, "Wins By Decision"], df.loc[fighter1, "Total Fights"]),
                                   calculate_mean(df.loc[fighter2, "Wins By Decision"], df.loc[fighter2, "Total Fights"])], 
            
           "Decision Wins SD" : [calculate_sd(df.loc[fighter1, "Wins By Decision"], df.loc[fighter1, "Total Fights"]),
                                   calculate_sd(df.loc[fighter2, "Wins By Decision"], df.loc[fighter2, "Total Fights"])],
            
           "KO Wins Mean" : [calculate_mean(df.loc[fighter1, "Wins By KO"], df.loc[fighter1, "Total Fights"]),
                                   calculate_mean(df.loc[fighter2, "Wins By KO"], df.loc[fighter2, "Total Fights"])], 
            
            'KO Wins SD' : [calculate_sd(df.loc[fighter1, "Wins By KO"], df.loc[fighter1, "Total Fights"]),
                                   calculate_sd(df.loc[fighter2, "Wins By KO"], df.loc[fighter2, "Total Fights"])], 
           
            'Sub Wins Mean' : [calculate_mean(df.loc[fighter1, "Wins By Submission"], df.loc[fighter1, "Total Fights"]),
                                   calculate_mean(df.loc[fighter2, "Wins By Submission"], df.loc[fighter2, "Total Fights"])],
           
            'Sub Wins SD' : [calculate_sd(df.loc[fighter1, "Wins By Submission"], df.loc[fighter1, "Total Fights"]),
                                   calculate_sd(df.loc[fighter2, "Wins By Submission"], df.loc[fighter2, "Total Fights"])], 
            
            'Decision Loss Mean' : [calculate_mean(df.loc[fighter1, "Loss By Decision"], df.loc[fighter1, "Total Fights"]),
                                   calculate_mean(df.loc[fighter2, "Loss By Decision"], df.loc[fighter2, "Total Fights"])], 
            
            'Decision Loss SD' : [calculate_sd(df.loc[fighter1, "Loss By Decision"], df.loc[fighter1, "Total Fights"]),
                                   calculate_sd(df.loc[fighter2, "Loss By Decision"], df.loc[fighter2, "Total Fights"])], 
            
            'KO Loss Mean' : [calculate_mean(df.loc[fighter1, "Loss By KO"], df.loc[fighter1, "Total Fights"]),
                                   calculate_mean(df.loc[fighter2, "Loss By KO"], df.loc[fighter2, "Total Fights"])], 
            
            'KO Loss SD' : [calculate_sd(df.loc[fighter1, "Loss By KO"], df.loc[fighter1, "Total Fights"]),
                                   calculate_sd(df.loc[fighter2, "Loss By KO"], df.loc[fighter2, "Total Fights"])], 
           
            'Sub Loss Mean' : [calculate_mean(df.loc[fighter1, "Loss By Submission"], df.loc[fighter1, "Total Fights"]),
                                   calculate_mean(df.loc[fighter2, "Loss By Submission"], df.loc[fighter2, "Total Fights"])], 
            
            'Sub Loss SD' : [calculate_sd(df.loc[fighter1, "Loss By Submission"], df.loc[fighter1, "Total Fights"]),
                                   calculate_sd(df.loc[fighter2, "Loss By Submission"], df.loc[fighter2, "Total Fights"])]}
    
    fighter_parameter_df = pd.DataFrame(data=data, index = [fighter1, fighter2])
    return fighter_parameter_df

In [91]:
def gameSim():
    results = []
    fighter1_dec_score = (rnd.gauss(matchup_df.iloc[0]['Decision Wins Mean'],matchup_df.iloc[0]['Decision Wins SD'])+ 
                          rnd.gauss(matchup_df.iloc[1]['Decision Loss Mean'],matchup_df.iloc[1]['Decision Loss SD']))/2
    fighter1_ko_score = (rnd.gauss(matchup_df.iloc[0]['KO Wins Mean'],matchup_df.iloc[0]['KO Wins SD'])+ 
                          rnd.gauss(matchup_df.iloc[1]['KO Loss Mean'],matchup_df.iloc[1]['KO Loss SD']))/2
    fighter1_sub_score = (rnd.gauss(matchup_df.iloc[0]['Sub Wins Mean'],matchup_df.iloc[0]['Sub Wins SD'])+ 
                          rnd.gauss(matchup_df.iloc[1]['Sub Loss Mean'],matchup_df.iloc[1]['Sub Loss SD']))/2
    fighter2_dec_score = (rnd.gauss(matchup_df.iloc[1]['Decision Wins Mean'],matchup_df.iloc[1]['Decision Wins SD'])+ 
                          rnd.gauss(matchup_df.iloc[0]['Decision Loss Mean'],matchup_df.iloc[0]['Decision Loss SD']))/2
    fighter2_ko_score = (rnd.gauss(matchup_df.iloc[1]['KO Wins Mean'],matchup_df.iloc[1]['KO Wins SD'])+ 
                          rnd.gauss(matchup_df.iloc[0]['KO Loss Mean'],matchup_df.iloc[0]['KO Loss SD']))/2
    fighter2_sub_score = (rnd.gauss(matchup_df.iloc[1]['Sub Wins Mean'],matchup_df.iloc[1]['Sub Wins SD'])+ 
                          rnd.gauss(matchup_df.iloc[0]['Sub Loss Mean'],matchup_df.iloc[0]['Sub Loss SD']))/2
    
    #results.append(fighter1_dec_score*0.5713976633491995)
    #results.append(fighter1_ko_score*0.652855906533968)
    #results.append(fighter1_sub_score*0.776070964950238)    
    #results.append(fighter2_dec_score*0.5713976633491995)    
    #results.append(fighter2_ko_score*0.652855906533968)    
    #results.append(fighter2_sub_score*0.776070964950238)     
    
    results.append(fighter1_dec_score)
    results.append(fighter1_ko_score)
    results.append(fighter1_sub_score)    
    results.append(fighter2_dec_score)    
    results.append(fighter2_ko_score)    
    results.append(fighter2_sub_score)
    
    #print(results)
    if max(results) == results[0]:
        return "f1_dec"
    elif max(results) == results[1]:
        return "f1_ko"
    elif max(results) == results[2]:
        return "f1_sub"
    elif max(results) == results[3]:
        return "f2_dec"
    elif max(results) == results[4]:
        return "f2_ko"
    elif max(results) == results[5]:
        return "f2_sub"
    else: return "tie"

In [92]:
def gamesSim(ns):
    matchesout = []
    result_dict = {}
    fighter1_decwin = 0
    fighter1_kowin = 0
    fighter1_subwin = 0
    fighter2_decwin = 0
    fighter2_kowin = 0
    fighter2_subwin = 0
    tie = 0
    for i in range(ns):
        gm = gameSim()
        matchesout.append(gm)
        if gm == "f1_dec":
            fighter1_decwin +=1 
        elif gm == "f1_ko":
            fighter1_kowin +=1 
        elif gm == "f1_sub":
            fighter1_subwin +=1 
        elif gm == "f2_dec":
            fighter2_decwin +=1 
        elif gm == "f2_ko":
            fighter2_kowin +=1 
        elif gm == "f2_sub":
            fighter2_subwin +=1 
        else: tie +=1 
            
    result_dict[matchup_df.index[0] +' Decision Win '] = round((fighter1_decwin/ns)*100,2)
    result_dict[matchup_df.index[0] +' KO Win '] = round((fighter1_kowin/ns)*100,2)
    result_dict[matchup_df.index[0] +' Submission Win '] = round((fighter1_subwin/ns)*100,2)
    result_dict[matchup_df.index[1] +' Decision Win '] = round((fighter2_decwin/ns)*100,2)
    result_dict[matchup_df.index[1] +' KO Win '] = round((fighter2_kowin/ns)*100,2)
    result_dict[matchup_df.index[1] +' Submission Win '] = round((fighter2_subwin/ns)*100,2)
    result_dict["Tie"] = (tie/ns)*100
    
    #print(matchup_df.index[0] +' Decision Win ', round((fighter1_decwin/ns)*100,2),'%')
    #print(matchup_df.index[0] +' KO Win ', round((fighter1_kowin/ns)*100,2),'%')
    #print(matchup_df.index[0] +' Submission Win ', round((fighter1_subwin/ns)*100,2),'%')
    #print(matchup_df.index[1] +' Decision Win ', round((fighter2_decwin/ns)*100,2),'%')
    #print(matchup_df.index[1] +' KO Win ', round((fighter2_kowin/ns)*100,2),'%')
    #print(matchup_df.index[1] +' Submission Win ', round((fighter2_subwin/ns)*100,2),'%')
    #print('Tie ', (tie/ns)*100, '%')
    
    return max(result_dict, key=lambda k: result_dict[k])

## Model Evaluation

In [93]:
results_list = []
for i in range(len(test_list)):
    matchup_df = get_fighter_parameters(test_list[i][0], test_list[i][1])
    results_list.append(gamesSim(1000))

In [94]:
results = []
for outcome in results_list:
    outcome = outcome[:-1]
    results.append(outcome)

In [95]:
def method_simplify(df):
    if df["Method"] in decisions:
        return "Decision"
    elif df["Method"] in kos:
        return "KO"
    else: 
        return "Submission"

In [96]:
df_sampled['Method Summary'] = df_sampled.apply(method_simplify, axis = 1)

In [97]:
df_sampled.head()

Unnamed: 0,Full Name,Weight,Matchup,Outcome,Method,Round,Time,Method Summary
0,Aaron Riley,155 lbs.,Joe Brammer,WIN,U-DEC,3.0,5:00,Decision
1,Aaron Rosa,205 lbs.,Joey Beltran,LOSS,KO/TKO,3.0,1:26,KO
2,Aaron Simpson,170 lbs.,Ronny Markes,LOSS,S-DEC,3.0,5:00,Decision
3,Abdul Razak Alhassan,185 lbs.,Alessio Di Chirico,WIN,KO/TKO,1.0,0:17,KO
4,Abel Trujillo,155 lbs.,Marcus LeVesseur,WIN,KO/TKO,2.0,3:56,KO


In [98]:
def construct_outcome_str(df):
    if df["Outcome"] == 'WIN':
        return df['Full Name'] + " " + df["Method Summary"] + " " + df["Outcome"].title()
    else:
        return df['Matchup'] + " " + df["Method Summary"] + " " + "Win"

In [None]:
#remove win -- no need

In [99]:
df_sampled['Match Summary'] = df_sampled.apply(construct_outcome_str, axis = 1)

In [100]:
df_sampled["Predicted Match Summary"] = results

In [101]:
df_sampled

Unnamed: 0,Full Name,Weight,Matchup,Outcome,Method,Round,Time,Method Summary,Match Summary,Predicted Match Summary
0,Aaron Riley,155 lbs.,Joe Brammer,WIN,U-DEC,3.0,5:00,Decision,Aaron Riley Decision Win,Aaron Riley Decision Win
1,Aaron Rosa,205 lbs.,Joey Beltran,LOSS,KO/TKO,3.0,1:26,KO,Joey Beltran KO Win,Aaron Rosa Decision Win
2,Aaron Simpson,170 lbs.,Ronny Markes,LOSS,S-DEC,3.0,5:00,Decision,Ronny Markes Decision Win,Aaron Simpson KO Win
3,Abdul Razak Alhassan,185 lbs.,Alessio Di Chirico,WIN,KO/TKO,1.0,0:17,KO,Abdul Razak Alhassan KO Win,Abdul Razak Alhassan KO Win
4,Abel Trujillo,155 lbs.,Marcus LeVesseur,WIN,KO/TKO,2.0,3:56,KO,Abel Trujillo KO Win,Abel Trujillo KO Win
...,...,...,...,...,...,...,...,...,...,...
1256,Zelg Galesic,185 lbs.,Taiei Kin,WIN,KO/TKO,1.0,1:05,KO,Zelg Galesic KO Win,Taiei Kin Submission Win
1257,Zhalgas Zhumagulov,125 lbs.,Manel Kape,LOSS,KO/TKO,1.0,4:02,KO,Manel Kape KO Win,Manel Kape Decision Win
1258,Zhang Tiequan,155 lbs.,Darren Elkins,LOSS,U-DEC,3.0,5:00,Decision,Darren Elkins Decision Win,Darren Elkins Decision Win
1259,Zhang Weili,115 lbs.,Danielle Taylor,WIN,U-DEC,3.0,5:00,Decision,Zhang Weili Decision Win,Zhang Weili Decision Win


In [102]:
accuracy = ((df_sampled['Match Summary'] == df_sampled['Predicted Match Summary']).sum()/ len(df_sampled))*100 

In [103]:
accuracy

48.45360824742268

In [104]:
#accuracy of predicting winner and method for fighters with 5 or more fights = 48.53291038858049%
#accuracy of predicting winner and method for fighters with 10 or more fights = 42.97658862876254%

In [105]:
actual_winners = []
for index, row in df_sampled.iterrows():
    temp_list = row["Match Summary"].split()[:-2]
    full_name = " ".join(temp_list)
    actual_winners.append(full_name)

In [106]:
predicted_winners = []
for index, row in df_sampled.iterrows():
    temp_list = row["Predicted Match Summary"].split()[:-2]
    full_name = " ".join(temp_list)
    predicted_winners.append(full_name)

In [108]:
count = 0
for winner1, winner2 in zip(actual_winners, predicted_winners):
    if winner1 == winner2:
        count += 1

In [109]:
count

863

In [112]:
(863/1261)*100

68.43774781919112

In [111]:
#accuracy of predicting winner for fighters with 5 or more fights = 68.43774781919112%
#accuracy of predicting winner for fighters with 10 or more fights = 64.71571906354515%

In [None]:
#simplify code throughout especially evaluation