In [7]:
# Import necessary libraries
import pandas as pd

# Load the UFC fight statistics and events dataset
df_fight_stats_fight = pd.read_csv('../04.csv_clean/ufc_fight_stats_fight.csv')
df_events = pd.read_csv('../04.csv_clean/ufc_event_details_clean.csv')

# Convert date columns to datetime
df_fight_stats_fight['date'] = pd.to_datetime(df_fight_stats_fight['date'], errors='coerce')
df_events['date'] = pd.to_datetime(df_events['date'], errors='coerce')

# Display columns and types
#df_fight_stats_fight.dtypes
#df_events.dtypes
df_fight_stats_fight.head(5)

Unnamed: 0,date,event,bout,fighter,opponent,round,round_time,sig_str_land,sig_str_att,sig_str_acc_%,...,sub_att,sub_att_opp,rev,ctrl,distance_ss_pct,clinch_ss_pct,ground_ss_pct,head_ss_pct,body_ss_pct,leg_ss_pct
0,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Tai Tuivasa vs. Tallison Teixeira,Tai Tuivasa,Tallison Teixeira,3,15.0,42,58,0.724138,...,0.0,0.0,0.0,0:420:063:10,0.642857,0.261905,0.095238,0.5,0.238095,0.261905
1,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Tai Tuivasa vs. Tallison Teixeira,Tallison Teixeira,Tai Tuivasa,3,15.0,51,74,0.689189,...,0.0,0.0,0.0,3:573:010:02,0.509804,0.078431,0.411765,0.764706,0.196078,0.039216
2,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Sulangrangbo vs. Lawrence Lui,Lawrence Lui,Sulangrangbo,3,15.0,52,150,0.346667,...,0.0,0.0,0.0,1:411:002:18,0.75,0.115385,0.134615,0.788462,0.115385,0.096154
3,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Sulangrangbo vs. Lawrence Lui,Sulangrangbo,Lawrence Lui,3,15.0,56,125,0.448,...,0.0,0.0,0.0,0:090:050:00,0.928571,0.071429,0.0,0.946429,0.053571,0.0
4,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Sangwook Kim vs. Dom Mar Fan,Dom Mar Fan,Sangwook Kim,3,15.0,99,173,0.572254,...,0.0,0.0,1.0,1:251:420:00,0.636364,0.292929,0.070707,0.79798,0.181818,0.020202


In [8]:
import pandas as pd

# Ensure date column is in datetime format
df_fight_stats_fight['date'] = pd.to_datetime(df_fight_stats_fight['date'], errors='coerce')

# Filter only the fighter you want
#fighter_name = 'Ilia Topuria'
#df_fight_stats_fight = df_fight_stats_fight[df_fight_stats_fight['fighter'] == fighter_name].copy()

# Sort fights chronologically
df_fight_stats_fight = df_fight_stats_fight.sort_values('date', ascending=True)

# Columns to accumulate
stats_cols = [
    'round', 'round_time', 'sig_str_land', 'sig_str_att', 'sig_str_land_opp', 'sig_str_att_opp', 'td_land', 'td_att', 'td_land_opp', 'td_att_opp', 'sub_att', 'sub_att_opp'
]

# Compute cumulative stats prior each fight
# (shift(1) ensures the current fight is excluded)
for col in stats_cols:
    df_fight_stats_fight[f'{col}_cum'] = df_fight_stats_fight.groupby('fighter')[col].cumsum().shift(1)

# Replace NaN for the first fight with 0
df_fight_stats_fight[[f'{c}_cum' for c in stats_cols]] = df_fight_stats_fight[
    [f'{c}_cum' for c in stats_cols]
].fillna(0)

# Calculate 'sslpm_cum' (statistical significant strikes landed per minute) 
df_fight_stats_fight['sslpm_cum'] = (df_fight_stats_fight['sig_str_land_cum'] / df_fight_stats_fight['round_time_cum']).astype(float)

# Calculate 'sig_str_acc_%_cum' (statistical significant strike accuracy percentage)
df_fight_stats_fight['sig_str_acc_%_cum'] = (df_fight_stats_fight['sig_str_land_cum'] / df_fight_stats_fight['sig_str_att_cum']).astype(float)

# Calculate 'ssapm_cum' (opponent's statistical significant strikes landed per minute)
df_fight_stats_fight['ssapm_cum'] = (df_fight_stats_fight['sig_str_land_opp_cum'] / df_fight_stats_fight['round_time_cum']).astype(float)

# Calculate 'sig_str_def_%_cum' (statistical significant strike defense percentage)
df_fight_stats_fight['sig_str_def_%_cum'] = 1 - (df_fight_stats_fight['sig_str_land_opp_cum'] / df_fight_stats_fight['sig_str_att_opp_cum']).astype(float)

# Calculate 'td_land_15m_cum' (statistical takedowns landed average per 15 minutes)
df_fight_stats_fight['td_land_15m_cum'] = ((df_fight_stats_fight['td_land_cum'] / df_fight_stats_fight['round_time_cum']) * 15).astype(float)

# Calculate 'td_acc_%_cum' (statistical takedown accuracy percentage)
df_fight_stats_fight['td_acc_%_cum'] = (df_fight_stats_fight['td_land_cum'] / df_fight_stats_fight['td_att_cum']).astype(float)

# Calculate 'td_abs_15m_cum' (statistical takedowns absorbed average per 15 minutes)
df_fight_stats_fight['td_abs_15m_cum'] = ((df_fight_stats_fight['td_land_opp_cum'] / df_fight_stats_fight['round_time_cum']) * 15).astype(float)

# Calculate 'td_def_%_cum' (statistical takedown defense percentage)
df_fight_stats_fight['td_def_%_cum'] = 1 - (df_fight_stats_fight['td_land_opp_cum'] / df_fight_stats_fight['td_att_opp_cum']).astype(float)

# Calculate 'sub_att_avg_15m_cum' (statistical submission attempts average per 15 minutes)
df_fight_stats_fight['sub_att_15m_cum'] = ((df_fight_stats_fight['sub_att_cum'] / df_fight_stats_fight['round_time_cum']) * 15).astype(float)

# Calculate 'sub_att_abs_15m_cum' (statistical submission attempts absorbed average per 15 minutes)
df_fight_stats_fight['sub_att_abs_15m_cum'] = ((df_fight_stats_fight['sub_att_opp_cum'] / df_fight_stats_fight['round_time_cum']) * 15).astype(float)

# Sort last date first for display
df_fight_stats_fight = df_fight_stats_fight.sort_values('date', ascending=False)

# Select relevant columns
df_fight_stats_fight = df_fight_stats_fight[['date', 'event', 'fighter', 'opponent', 'round_time_cum', 'sslpm_cum', 'sig_str_acc_%_cum', 'ssapm_cum', 'sig_str_def_%_cum', 'td_land_15m_cum', 'td_acc_%_cum', 'td_abs_15m_cum', 'td_def_%_cum', 'sub_att_15m_cum', 'sub_att_abs_15m_cum']] #+ [f'{c}_cum' for c in stats_cols]].head(10)

# Save the updated dataframe to a new CSV file
df_fight_stats_fight.to_csv('../04.csv_clean/ufc_fight_stats_fight_prior.csv', index=False)

#df_fight_stats_fight.columns
df_fight_stats_fight.head(5)

Unnamed: 0,date,event,fighter,opponent,round_time_cum,sslpm_cum,sig_str_acc_%_cum,ssapm_cum,sig_str_def_%_cum,td_land_15m_cum,td_acc_%_cum,td_abs_15m_cum,td_def_%_cum,sub_att_15m_cum,sub_att_abs_15m_cum
0,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Tai Tuivasa,Tallison Teixeira,35.4,2.711864,0.564706,3.587571,0.461864,0.847458,0.222222,0.0,,0.847458,0.847458
16,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Jacob Malkoun,Torrez Finney,13.8,5.724638,0.387255,3.26087,0.621849,1.086957,1.0,0.0,,0.0,0.0
9,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Quillan Salkilld,Jamie Mullarkey,19.98,2.452452,0.569767,3.403403,0.50365,0.0,0.0,0.750751,0.5,0.0,0.0
8,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Jamie Mullarkey,Quillan Salkilld,20.85,4.98801,0.594286,2.494005,0.458333,7.194245,0.333333,0.719424,0.857143,0.719424,0.0
7,2026-01-31,UFC 325: Volkanovski vs. Lopes 2,Rafael Fiziev,Mauricio Ruffy,121.83,4.104079,0.466418,4.276451,0.544182,2.462448,0.307692,0.615612,0.705882,0.123122,0.369367
