In [1]:
import pandas as pd

# Load the UFC fight statistics dataset
df_fight_stats_round = pd.read_csv('../04.csv_clean/ufc_fight_stats_round.csv')

# Convert 'date' column to datetime format
df_fight_stats_round['date'] = pd.to_datetime(df_fight_stats_round['date'], errors='coerce')

# Filter out rows < 2016-01-01
df_fight_stats_round = df_fight_stats_round[df_fight_stats_round['date'] >= '2016-01-01'].reset_index(drop=True)

# Display columns
df_fight_stats_round.head(2)
#df_fight_stats_round.shape
#df_fight_stats_round.dtypes

Unnamed: 0,date,event,bout,fighter,opponent,round,round_time,sig_str_land,sig_str_att,sig_str_land_opp,...,sub_att,sub_att_opp,rev,ctrl,distance_land,clinch_land,ground_land,head_land,body_land,leg_land
0,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Alexander Volkanovski vs. Diego Lopes,Alexander Volkanovski,Diego Lopes,1,5.0,12,27,12.0,...,0.0,0.0,0.0,0:27,12,0,0,5,0,7
1,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Alexander Volkanovski vs. Diego Lopes,Alexander Volkanovski,Diego Lopes,1,5.0,12,27,12.0,...,0.0,0.0,0.0,0:27,12,0,0,5,0,7


In [2]:
# AGGREGATE STATS PER FIGHTER PER BOUT
df_fight_stats_fight = df_fight_stats_round.groupby(['event', 'date', 'bout', 'fighter', 'opponent']).agg({
    'round': 'max',
    'round_time': 'sum',
    'sig_str_land': 'sum',
    'sig_str_att': 'sum',
    'sig_str_land_opp': 'sum',
    'sig_str_att_opp': 'sum',
    'total_str_land': 'sum',
    'total_str_att': 'sum',
    'kd': 'sum',
    'td_land': 'sum',
    'td_att': 'sum',
    'td_land_opp': 'sum',
    'td_att_opp': 'sum',
    'sub_att': 'sum',
    'sub_att_opp': 'sum',
    'rev': 'sum',
    'ctrl': 'sum',
    'distance_land': 'sum',
    'clinch_land': 'sum',
    'ground_land': 'sum',
    'head_land': 'sum',
    'body_land': 'sum',
    'leg_land': 'sum'
}).reset_index()

# Calculate accuracy percentages
df_fight_stats_fight['sig_str_acc_%'] = df_fight_stats_fight['sig_str_land'] / df_fight_stats_fight['sig_str_att']
df_fight_stats_fight['total_str_acc_%'] = df_fight_stats_fight['total_str_land'] / df_fight_stats_fight['total_str_att']
df_fight_stats_fight['td_acc_%'] = df_fight_stats_fight['td_land'] / df_fight_stats_fight['td_att']

# Calculate sig_str_def_% (Significant Strike Defense Percentage)
df_fight_stats_fight['sig_str_def_%'] = 1 - (df_fight_stats_fight['sig_str_land_opp'] / df_fight_stats_fight['sig_str_att_opp'])

# Calculate td_def_% (Takedown Defense Percentage)
df_fight_stats_fight['td_def_%'] = 1 - (df_fight_stats_fight['td_land_opp'] / df_fight_stats_fight['td_att_opp'])

# Calculate place and positions of sig_str_land
df_fight_stats_fight['distance_ss_pct'] = df_fight_stats_fight['distance_land'] / df_fight_stats_fight['sig_str_land']
df_fight_stats_fight['clinch_ss_pct'] = df_fight_stats_fight['clinch_land'] / df_fight_stats_fight['sig_str_land']
df_fight_stats_fight['ground_ss_pct'] = df_fight_stats_fight['ground_land'] / df_fight_stats_fight['sig_str_land']
df_fight_stats_fight['head_ss_pct'] = df_fight_stats_fight['head_land'] / df_fight_stats_fight['sig_str_land']
df_fight_stats_fight['body_ss_pct'] = df_fight_stats_fight['body_land'] / df_fight_stats_fight['sig_str_land']
df_fight_stats_fight['leg_ss_pct'] = df_fight_stats_fight['leg_land'] / df_fight_stats_fight['sig_str_land']

# Reorder columns for better readability
df_fight_stats_fight = df_fight_stats_fight[[
    # General information
    'date', 'event', 'bout', 'fighter', 'opponent', 'round', 'round_time',
    # Striking stats
    'sig_str_land', 'sig_str_att', 'sig_str_acc_%', 'sig_str_land_opp', 'sig_str_att_opp', 'sig_str_def_%', 'total_str_land', 'total_str_att', 'total_str_acc_%', 'kd',
    # Grappling stats
    'td_land', 'td_att', 'td_acc_%', 'td_land_opp', 'td_att_opp', 'td_def_%', 'sub_att', 'sub_att_opp', 'rev', 'ctrl',
    # Fight style stats and effectiveness
    'distance_ss_pct', 'clinch_ss_pct', 'ground_ss_pct',
    # Significant strike locations
    'head_ss_pct', 'body_ss_pct', 'leg_ss_pct'
]]


# Change date column to datetime
df_fight_stats_fight['date'] = pd.to_datetime(df_fight_stats_fight['date'], errors='coerce') 

# Sort the datafram by 'date' and 'bout' in descending order
df_fight_stats_fight.sort_values(by=['date', 'bout'], ascending=False, inplace=True)

# Save the aggregated dataframe
df_fight_stats_fight.to_csv('../04.csv_clean/ufc_fight_stats_fight.csv', index=False)

# Display columns
df_fight_stats_fight.head(10)
#df_fight_stats_fight.shape

Unnamed: 0,date,event,bout,fighter,opponent,round,round_time,sig_str_land,sig_str_att,sig_str_acc_%,...,sub_att,sub_att_opp,rev,ctrl,distance_ss_pct,clinch_ss_pct,ground_ss_pct,head_ss_pct,body_ss_pct,leg_ss_pct
2766,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Tai Tuivasa vs. Tallison Teixeira,Tai Tuivasa,Tallison Teixeira,3,15.0,42,58,0.724138,...,0.0,0.0,0.0,0:420:063:10,0.642857,0.261905,0.095238,0.5,0.238095,0.261905
2767,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Tai Tuivasa vs. Tallison Teixeira,Tallison Teixeira,Tai Tuivasa,3,15.0,51,74,0.689189,...,0.0,0.0,0.0,3:573:010:02,0.509804,0.078431,0.411765,0.764706,0.196078,0.039216
2764,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Sulangrangbo vs. Lawrence Lui,Lawrence Lui,Sulangrangbo,3,15.0,52,150,0.346667,...,0.0,0.0,0.0,1:411:002:18,0.75,0.115385,0.134615,0.788462,0.115385,0.096154
2765,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Sulangrangbo vs. Lawrence Lui,Sulangrangbo,Lawrence Lui,3,15.0,56,125,0.448,...,0.0,0.0,0.0,0:090:050:00,0.928571,0.071429,0.0,0.946429,0.053571,0.0
2762,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Sangwook Kim vs. Dom Mar Fan,Dom Mar Fan,Sangwook Kim,3,15.0,99,173,0.572254,...,0.0,0.0,1.0,1:251:420:00,0.636364,0.292929,0.070707,0.79798,0.181818,0.020202
2763,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Sangwook Kim vs. Dom Mar Fan,Sangwook Kim,Dom Mar Fan,3,15.0,68,134,0.507463,...,0.0,0.0,0.0,3:141:021:14,0.735294,0.264706,0.0,0.544118,0.426471,0.029412
2760,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Rafael Fiziev vs. Mauricio Ruffy,Mauricio Ruffy,Rafael Fiziev,2,9.5,55,88,0.625,...,0.0,0.0,0.0,0:250:20,0.763636,0.018182,0.218182,0.818182,0.072727,0.109091
2761,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Rafael Fiziev vs. Mauricio Ruffy,Rafael Fiziev,Mauricio Ruffy,2,9.5,36,66,0.545455,...,0.0,0.0,0.0,0:000:00,0.944444,0.055556,0.0,0.25,0.416667,0.333333
2758,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Quillan Salkilld vs. Jamie Mullarkey,Jamie Mullarkey,Quillan Salkilld,1,3.03,1,4,0.25,...,0.0,1.0,0.0,1:14,0.0,1.0,0.0,1.0,0.0,0.0
2759,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Quillan Salkilld vs. Jamie Mullarkey,Quillan Salkilld,Jamie Mullarkey,1,3.03,5,8,0.625,...,1.0,0.0,0.0,0:39,0.6,0.4,0.0,0.2,0.2,0.6
