In [1]:
import pandas as pd
import numpy as np
import re
pd.set_option('display.max_columns', None)

In [2]:
data = pd.read_csv("ufc_data_merged.csv")

In [3]:
# Changes suffix to prefix 
def fix_suffix_to_prefix(column_name):
    column_name = column_name.lower().replace('.', '_').replace(':', '').replace(' ', '_')
    if column_name.endswith('_f1') or column_name.endswith('_f2'):
        suffix = column_name[-3:]
        prefix = 'f1_' if suffix == '_f1' else 'f2_'
        return prefix + column_name[:-3]
    elif '_f1_' in column_name:
        return column_name.replace('_f1_', '_')
    elif '_f2_' in column_name:
        return column_name.replace('_f2_', '_')
    else:
        return column_name

# Sorts columns
def custom_sort(column_name):
    if not column_name.startswith('f1_') and not column_name.startswith('f2_'):
        return (0, column_name)
    elif column_name.startswith('f1_'):
        return (1, column_name)
    else:
        return (2, column_name)

In [4]:
# Fix formatting colummn names
data.columns = [fix_suffix_to_prefix(col) for col in data.columns]
# Sort columns
sorted_columns = sorted(data.columns, key=custom_sort)
data = data[sorted_columns]

In [5]:
# Drop list for redundant, and unneccesary columns
drop_list = []
data.head(2)

Unnamed: 0,card_name,end_time,f1,f2,fight_date,fights_location,method,referee,round,time_format,weightclass,winner,f1_body,f1_clinch,f1_control,f1_distance,f1_dob,f1_fighter,f1_ground,f1_head,f1_height,f1_kd,f1_leg,f1_reach,f1_record,f1_rev,f1_sapm,f1_sig_str,f1_sig_str_%,f1_slpm,f1_stance,f1_str__acc_,f1_str__def,f1_sub__avg_,f1_sub_att,f1_td,f1_td_%,f1_td_acc_,f1_td_avg_,f1_td_def_,f1_total_str,f1_unnamed_11,f1_url,f1_weight,f2_body,f2_clinch,f2_control,f2_distance,f2_dob,f2_fighter,f2_ground,f2_head,f2_height,f2_kd,f2_leg,f2_reach,f2_record,f2_rev,f2_sapm,f2_sig_str,f2_sig_str_%,f2_slpm,f2_stance,f2_str__acc_,f2_str__def,f2_sub__avg_,f2_sub_att,f2_td,f2_td_%,f2_td_acc_,f2_td_avg_,f2_td_def_,f2_total_str,f2_unnamed_11,f2_url,f2_weight
0,UFC Fight Night: Barboza vs. Murphy,5:00,Lerone Murphy,Edson Barboza,"May 18, 2024","Las Vegas, Nevada, USA",Decision - Unanimous,Herb Dean,5,5,Featherweight Bout,Lerone Murphy,27 of 34,13 of 17,5:19,205 of 344,"Jul 22, 1991",Lerone Murphy,2 of 3,172 of 308,"5' 9""",0,21 of 22,"73""",Record: 14-0-1,0,2.6,220 of 364,60%,5.01,Orthodox,54%,60%,0.6,0,4 of 6,66%,52%,1.59,46%,259 of 410,,http://www.ufcstats.com/fighter-details/396fe8...,145 lbs.,15 of 21,2 of 3,0:17,77 of 239,"Jan 21, 1986",Edson Barboza,0 of 0,38 of 189,"5' 11""",0,26 of 32,"75""",Record: 24-12-0,0,4.65,79 of 242,32%,4.11,Orthodox,44%,56%,0.1,0,0 of 0,---,50%,0.45,72%,79 of 242,,http://www.ufcstats.com/fighter-details/64a50d...,145 lbs.
1,UFC Fight Night: Barboza vs. Murphy,1:30,Khaos Williams,Carlston Harris,"May 18, 2024","Las Vegas, Nevada, USA",KO/TKO,Dan Miragliotta,1,3,Welterweight Bout,Khaos Williams,2 of 4,0 of 0,0:01,11 of 19,"Mar 30, 1994",Khaos Williams,1 of 1,5 of 9,"6' 0""",1,5 of 7,"77""",Record: 15-3-0,0,5.42,12 of 20,60%,5.81,Orthodox,39%,42%,0.0,0,0 of 0,---,0%,0.0,80%,12 of 20,,http://www.ufcstats.com/fighter-details/2558ae...,170 lbs.,1 of 2,0 of 0,0:00,14 of 30,"Jul 09, 1987",Carlston Harris,0 of 0,10 of 25,"6' 0""",0,3 of 3,"76""",Record: 19-6-0,0,2.66,14 of 30,46%,3.18,Orthodox,49%,52%,0.8,0,0 of 0,---,29%,1.97,55%,14 of 30,,http://www.ufcstats.com/fighter-details/a53d30...,170 lbs.


In [6]:
data['winner'] = np.where(data['winner'] != '0', 'f1', data['winner'])

In [7]:
data['winner'].value_counts()

winner
f1    7506
0      143
Name: count, dtype: int64

In [8]:
drop_list += ['f1', 'f2', 'fights_location', 'f1_fighter', 'f1_unnamed_11', 'f1_url', 'f2_fighter', 'f2_unnamed_11', 'f2_url', 'card_name']

In [9]:
data.drop(columns=drop_list, inplace=True)

#### filtering data before looking at the acutal value since there are some issues with record with old fights, since they are less relevant I will remove them

In [10]:
#remove all open weight
data['weightclass'].unique()

array(['Featherweight Bout', 'Welterweight Bout', 'Bantamweight Bout',
       "Women's Strawweight Bout", 'Lightweight Bout',
       'Light Heavyweight Bout', "Women's Bantamweight Bout",
       'Middleweight Bout', 'Heavyweight Bout', 'Flyweight Bout',
       "Women's Flyweight Bout", 'UFC Flyweight Title Bout',
       'UFC Light Heavyweight Title Bout',
       "UFC Women's Strawweight Title Bout",
       'UFC Bantamweight Title Bout', 'UFC Featherweight Title Bout',
       'UFC Middleweight Title Bout',
       "UFC Women's Bantamweight Title Bout",
       'UFC Welterweight Title Bout',
       'UFC Interim Heavyweight Title Bout', 'Catch Weight Bout',
       'UFC Lightweight Title Bout', "UFC Women's Flyweight Title Bout",
       "Women's Featherweight Bout", 'UFC Heavyweight Title Bout',
       'UFC Interim Featherweight Title Bout',
       'UFC Interim Flyweight Title Bout',
       'UFC Interim Bantamweight Title Bout',
       "UFC Women's Featherweight Title Bout",
       'UFC Inte

In [11]:
# Convert "fight_date" column to datetime format
data['fight_date'] = pd.to_datetime(data['fight_date'])

# Extract year from the "fight_date" column
data['year'] = data['fight_date'].dt.year

weightclass_counts = data.pivot_table(index='year', columns='weightclass', aggfunc='size', fill_value=0)

# Display the DataFrame
display(weightclass_counts)
display(weightclass_counts.shape)
weightclass_counts.columns

weightclass,Bantamweight Bout,Catch Weight Bout,Featherweight Bout,Flyweight Bout,Heavyweight Bout,Light Heavyweight Bout,Lightweight Bout,Middleweight Bout,Open Weight Bout,Super Heavyweight Bout,TUF Nations Canada vs. Australia Middleweight Tournament Title Bout,TUF Nations Canada vs. Australia Welterweight Tournament Title Bout,UFC 10 Tournament Title Bout,UFC 13 Heavyweight Tournament Title Bout,UFC 13 Lightweight Tournament Title Bout,UFC 14 Heavyweight Tournament Title Bout,UFC 14 Middleweight Tournament Title Bout,UFC 15 Heavyweight Tournament Title Bout,UFC 17 Middleweight Tournament Title Bout,UFC 2 Tournament Title Bout,UFC 3 Tournament Title Bout,UFC 4 Tournament Title Bout,UFC 5 Tournament Title Bout,UFC 6 Tournament Title Bout,UFC 7 Tournament Title Bout,UFC 8 Tournament Title Bout,UFC Bantamweight Title Bout,UFC Featherweight Title Bout,UFC Flyweight Title Bout,UFC Heavyweight Title Bout,UFC Interim Bantamweight Title Bout,UFC Interim Featherweight Title Bout,UFC Interim Flyweight Title Bout,UFC Interim Heavyweight Title Bout,UFC Interim Light Heavyweight Title Bout,UFC Interim Lightweight Title Bout,UFC Interim Middleweight Title Bout,UFC Interim Welterweight Title Bout,UFC Light Heavyweight Title Bout,UFC Lightweight Title Bout,UFC Middleweight Title Bout,UFC Superfight Championship Bout,UFC Welterweight Title Bout,UFC Women's Bantamweight Title Bout,UFC Women's Featherweight Title Bout,UFC Women's Flyweight Title Bout,UFC Women's Strawweight Title Bout,Ultimate Fighter 1 Light Heavyweight Tournament Title Bout,Ultimate Fighter 1 Middleweight Tournament Title Bout,Ultimate Fighter 10 Heavyweight Tournament Title Bout,Ultimate Fighter 11 Middleweight Tournament Title Bout,Ultimate Fighter 12 Lightweight Tournament Title Bout,Ultimate Fighter 13 Welterweight Tournament Title Bout,Ultimate Fighter 14 Bantamweight Tournament Title Bout,Ultimate Fighter 14 Featherweight Tournament Title Bout,Ultimate Fighter 15 Lightweight Tournament Title Bout,Ultimate Fighter 16 Welterweight Tournament Title Bout,Ultimate Fighter 17 Middleweight Tournament Title Bout,Ultimate Fighter 18 Bantamweight Tournament Title Bout,Ultimate Fighter 18 Women's Bantamweight Tournament Title Bout,Ultimate Fighter 19 Light Heavyweight Tournament Title Bout,Ultimate Fighter 19 Middleweight Tournament Title Bout,Ultimate Fighter 2 Heavyweight Tournament Title Bout,Ultimate Fighter 2 Welterweight Tournament Title Bout,Ultimate Fighter 21 Welterweight Tournament Title Bout,Ultimate Fighter 22 Lightweight Tournament Title Bout,Ultimate Fighter 23 Light Heavyweight Tournament Title Bout,Ultimate Fighter 23 Women's Strawweight Tournament Title Bout,Ultimate Fighter 25 Welterweight Tournament Title Bout,Ultimate Fighter 27 Featherweight Tournament Title Bout,Ultimate Fighter 27 Lightweight Tournament Title Bout,Ultimate Fighter 28 Heavyweight Tournament Title Bout,Ultimate Fighter 28 Women's Featherweight Tournament Title Bout,Ultimate Fighter 3 Light Heavyweight Tournament Title Bout,Ultimate Fighter 3 Middleweight Tournament Title Bout,Ultimate Fighter 4 Middleweight Tournament Title Bout,Ultimate Fighter 4 Welterweight Tournament Title Bout,Ultimate Fighter 5 Lightweight Tournament Title Bout,Ultimate Fighter 6 Welterweight Tournament Title Bout,Ultimate Fighter 7 Middleweight Tournament Title Bout,Ultimate Fighter 8 Light Heavyweight Tournament Title Bout,Ultimate Fighter 8 Lightweight Tournament Title Bout,Ultimate Fighter 9 Lightweight Tournament Title Bout,Ultimate Fighter 9 Welterweight Tournament Title Bout,Ultimate Fighter Australia vs. UK Lightweight Tournament Title Bout,Ultimate Fighter Australia vs. UK Welterweight Tournament Title Bout,Ultimate Fighter Brazil 1 Featherweight Tournament Title Bout,Ultimate Fighter Brazil 1 Middleweight Tournament Title Bout,Ultimate Fighter Brazil 2 Welterweight Tournament Title Bout,Ultimate Fighter Brazil 3 Heavyweight Tournament Title Bout,Ultimate Fighter Brazil 3 Middleweight Tournament Title Bout,Ultimate Fighter Brazil 4 Bantamweight Tournament Title Bout,Ultimate Fighter Brazil 4 Lightweight Tournament Title Bout,Ultimate Fighter China Featherweight Tournament Title Bout,Ultimate Fighter China Welterweight Tournament Title Bout,Ultimate Fighter Latin America 2 Lightweight Tournament Title Bout,Ultimate Fighter Latin America 2 Welterweight Tournament Title Bout,Ultimate Fighter Latin America 3 Lightweight Tournament Title Bout,Ultimate Fighter Latin America Bantamweight Tournament Title Bout,Ultimate Fighter Latin America Featherweight Tournament Title Bout,Ultimate Japan 2 Heavyweight Tournament Title Bout,Ultimate Japan Heavyweight Tournament Title Bout,Ultimate Ultimate '95 Tournament Title Bout,Ultimate Ultimate '96 Tournament Title Bout,Welterweight Bout,Women's Bantamweight Bout,Women's Featherweight Bout,Women's Flyweight Bout,Women's Strawweight Bout
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1
1994,0,0,0,0,0,0,0,0,26,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1995,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
1996,0,0,0,0,0,0,0,0,31,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
1997,0,0,0,0,18,0,10,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
1998,0,0,0,0,6,0,3,7,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1999,0,0,0,0,17,0,6,13,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
2000,0,0,0,0,10,0,7,12,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0
2001,0,0,0,0,8,5,5,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,3,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0
2002,0,0,0,0,9,8,7,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0
2003,0,0,0,0,11,3,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,1,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0


(31, 109)

Index(['Bantamweight Bout', 'Catch Weight Bout', 'Featherweight Bout',
       'Flyweight Bout', 'Heavyweight Bout', 'Light Heavyweight Bout',
       'Lightweight Bout', 'Middleweight Bout', 'Open Weight Bout',
       'Super Heavyweight Bout',
       ...
       'Ultimate Fighter Latin America Featherweight Tournament Title Bout',
       'Ultimate Japan 2 Heavyweight Tournament Title Bout',
       'Ultimate Japan Heavyweight Tournament Title Bout',
       'Ultimate Ultimate '95 Tournament Title Bout',
       'Ultimate Ultimate '96 Tournament Title Bout', 'Welterweight Bout',
       'Women's Bantamweight Bout', 'Women's Featherweight Bout',
       'Women's Flyweight Bout', 'Women's Strawweight Bout'],
      dtype='object', name='weightclass', length=109)

In [16]:
# Clean up the weight classes
cleaned_weight_classes = set()
for wc in data['weightclass']:
    wc = wc.strip()  # Remove leading and trailing whitespace
    wc = wc.replace('Ultimate Fighter', '').replace('Ultimate Ultimate', '') \
           .replace('Brazil', '').replace('China', '').replace('Latin America', '') \
           .replace('Australia vs. UK', '').replace('Japan', '') \
           .replace('\'', '').replace('Women\'s', 'Women\'s ')  # Standardize substrings
    wc = wc.strip()  # Remove leading and trailing whitespace again if necessary
    cleaned_weight_classes.add(wc)

# Convert back to a list and sort
cleaned_weight_classes = sorted(cleaned_weight_classes)

# Display the cleaned weight classes
print(cleaned_weight_classes)

['1 Featherweight Tournament Title Bout', '1 Light Heavyweight Tournament Title Bout', '1 Middleweight Tournament Title Bout', '10 Heavyweight Tournament Title Bout', '11 Middleweight Tournament Title Bout', '12 Lightweight Tournament Title Bout', '13 Welterweight Tournament Title Bout', '14 Bantamweight Tournament Title Bout', '14 Featherweight Tournament Title Bout', '15 Lightweight Tournament Title Bout', '16 Welterweight Tournament Title Bout', '17 Middleweight Tournament Title Bout', '18 Bantamweight Tournament Title Bout', '18 Womens Bantamweight Tournament Title Bout', '19 Light Heavyweight Tournament Title Bout', '19 Middleweight Tournament Title Bout', '2 Heavyweight Tournament Title Bout', '2 Lightweight Tournament Title Bout', '2 Welterweight Tournament Title Bout', '21 Welterweight Tournament Title Bout', '22 Lightweight Tournament Title Bout', '23 Light Heavyweight Tournament Title Bout', '23 Womens Strawweight Tournament Title Bout', '25 Welterweight Tournament Title Bout