# Feature Engineering

### Connect to Postgres

In [1]:
import os
import psycopg2
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

DB_PARAMS = {
  "password": os.getenv("POSTGRES_PASSWORD"),
}

conn_string = f"postgresql://erickim:{DB_PARAMS['password']}@localhost:5432/ufc"
engine = create_engine(conn_string)

query1 = 'SELECT * FROM fighter_fights;'
fighter_fights = pd.read_sql(query1, con=engine)

query2 = 'SELECT * FROM fights;'
fights = pd.read_sql(query2, con=engine)

# Set the maximum number of columns to 'None' (unlimited)
pd.set_option('display.max_columns', None)

### Merging Tables

We want to create a final dataset where each row contains a fight with red stats and blue stats. We need to calculate the averages of both red and blue fighters right up to their matchup. First let's access the date column from the fighter table and use it to sort the fighter_fights table from oldest to newest matchups.

In [2]:
df1 = pd.merge(fights, fighter_fights, on='fight_id')
df1.sort_values(by=['fight_id', 'event_id'])

drop_cols = [
    "event_id",
    "updated_at_x",
    "updated_at_y",
    "gender",
    "red_fighter_name",
    "blue_fighter_name",
    "red_status",
    "blue_status",
    "winner_id",
    "loser_id",
    "result_type",
    "end_round_time",
    "time_scheduled",
    "method_raw",
    'event_status_x',
    'event_status_y'
]

df1 = df1.drop(drop_cols, axis = 1)

df1.insert(2,'fighter_id', df1.pop("fighter_id"))
df1.insert(3,'opponent_id', df1.pop("opponent_id"))

print(df1.shape)
print(df1.columns)


(16532, 46)
Index(['fight_id', 'event_date', 'fighter_id', 'opponent_id', 'weight_class',
       'is_title_fight', 'red_fighter_id', 'blue_fighter_id', 'winner_color',
       'end_round', 'total_duration', 'rounds_scheduled', 'finish_type',
       'decision_type', 'referee', 'knockdowns', 'sub_attempts', 'reversals',
       'ctrl_time', 'tot_str_landed', 'tot_str_attempted', 'tot_str_raw',
       'td_landed', 'td_attempted', 'td_raw', 'sig_str_landed',
       'sig_str_attempted', 'sig_str_raw', 'head_str_landed',
       'head_str_attempted', 'head_str_raw', 'body_str_landed',
       'body_str_attempted', 'body_str_raw', 'leg_str_landed',
       'leg_str_attempted', 'leg_str_raw', 'distance_str_landed',
       'distance_str_attempted', 'distance_str_raw', 'clinch_str_landed',
       'clinch_str_attempted', 'clinch_str_raw', 'ground_str_landed',
       'ground_str_attempted', 'ground_str_raw'],
      dtype='object')


In [3]:
df1.head(10)

Unnamed: 0,fight_id,event_date,fighter_id,opponent_id,weight_class,is_title_fight,red_fighter_id,blue_fighter_id,winner_color,end_round,total_duration,rounds_scheduled,finish_type,decision_type,referee,knockdowns,sub_attempts,reversals,ctrl_time,tot_str_landed,tot_str_attempted,tot_str_raw,td_landed,td_attempted,td_raw,sig_str_landed,sig_str_attempted,sig_str_raw,head_str_landed,head_str_attempted,head_str_raw,body_str_landed,body_str_attempted,body_str_raw,leg_str_landed,leg_str_attempted,leg_str_raw,distance_str_landed,distance_str_attempted,distance_str_raw,clinch_str_landed,clinch_str_attempted,clinch_str_raw,ground_str_landed,ground_str_attempted,ground_str_raw
0,4a0db214d9721d6e,2025-12-06,c03520b5c88ed6b4,d661ce4da776fc20,Bantamweight,True,c03520b5c88ed6b4,d661ce4da776fc20,Blue,5.0,1500.0,5.0,DEC,U-DEC,Marc Goddard,0.0,2.0,1.0,312.0,196.0,458.0,196 of 458,2.0,29.0,2 of 29,134.0,383.0,134 of 383,105.0,341.0,105 of 341,22.0,34.0,22 of 34,7.0,8.0,7 of 8,116.0,353.0,116 of 353,18.0,30.0,18 of 30,0.0,0.0,0 of 0
1,4a0db214d9721d6e,2025-12-06,d661ce4da776fc20,c03520b5c88ed6b4,Bantamweight,True,c03520b5c88ed6b4,d661ce4da776fc20,Blue,5.0,1500.0,5.0,DEC,U-DEC,Marc Goddard,0.0,0.0,0.0,175.0,159.0,251.0,159 of 251,5.0,9.0,5 of 9,139.0,230.0,139 of 230,109.0,195.0,109 of 195,17.0,19.0,17 of 19,13.0,16.0,13 of 16,119.0,204.0,119 of 204,18.0,24.0,18 of 24,2.0,2.0,2 of 2
2,767b78b67c7da17c,2025-11-01,70380ccdc81915b8,a57cb948c4c70a47,Featherweight,False,a57cb948c4c70a47,70380ccdc81915b8,Blue,1.0,221.0,3.0,SUB,,Eric McMahon,0.0,3.0,0.0,176.0,28.0,43.0,28 of 43,0.0,0.0,0 of 0,1.0,1.0,1 of 1,0.0,0.0,0 of 0,0.0,0.0,0 of 0,1.0,1.0,1 of 1,1.0,1.0,1 of 1,0.0,0.0,0 of 0,0.0,0.0,0 of 0
3,767b78b67c7da17c,2025-11-01,a57cb948c4c70a47,70380ccdc81915b8,Featherweight,False,a57cb948c4c70a47,70380ccdc81915b8,Blue,1.0,221.0,3.0,SUB,,Eric McMahon,0.0,0.0,0.0,16.0,3.0,6.0,3 of 6,0.0,1.0,0 of 1,0.0,1.0,0 of 1,0.0,1.0,0 of 1,0.0,0.0,0 of 0,0.0,0.0,0 of 0,0.0,1.0,0 of 1,0.0,0.0,0 of 0,0.0,0.0,0 of 0
4,7add5901fac6ab82,2025-11-01,c0d5c0c95c59050b,d35298b6df168456,Women's Strawweight,False,d35298b6df168456,c0d5c0c95c59050b,Red,3.0,876.0,3.0,SUB,,Mark Smith,0.0,0.0,0.0,139.0,50.0,106.0,50 of 106,0.0,1.0,0 of 1,25.0,76.0,25 of 76,10.0,57.0,10 of 57,3.0,5.0,3 of 5,12.0,14.0,12 of 14,25.0,71.0,25 of 71,0.0,5.0,0 of 5,0.0,0.0,0 of 0
5,7add5901fac6ab82,2025-11-01,d35298b6df168456,c0d5c0c95c59050b,Women's Strawweight,False,d35298b6df168456,c0d5c0c95c59050b,Red,3.0,876.0,3.0,SUB,,Mark Smith,0.0,2.0,0.0,370.0,79.0,122.0,79 of 122,5.0,8.0,5 of 8,60.0,100.0,60 of 100,34.0,67.0,34 of 67,19.0,23.0,19 of 23,7.0,10.0,7 of 10,31.0,65.0,31 of 65,14.0,15.0,14 of 15,15.0,20.0,15 of 20
6,9d9390b550d77e06,2025-11-01,7d8435c56043b0ae,fc08099550072fe4,Heavyweight,False,fc08099550072fe4,7d8435c56043b0ae,Red,1.0,239.0,3.0,KO/TKO,,Mark Smith,0.0,0.0,0.0,25.0,18.0,38.0,18 of 38,0.0,1.0,0 of 1,16.0,36.0,16 of 36,13.0,33.0,13 of 33,2.0,2.0,2 of 2,1.0,1.0,1 of 1,15.0,33.0,15 of 33,1.0,3.0,1 of 3,0.0,0.0,0 of 0
7,9d9390b550d77e06,2025-11-01,fc08099550072fe4,7d8435c56043b0ae,Heavyweight,False,fc08099550072fe4,7d8435c56043b0ae,Red,1.0,239.0,3.0,KO/TKO,,Mark Smith,1.0,0.0,0.0,97.0,18.0,24.0,18 of 24,0.0,0.0,0 of 0,12.0,17.0,12 of 17,9.0,11.0,9 of 11,2.0,5.0,2 of 5,1.0,1.0,1 of 1,4.0,8.0,4 of 8,2.0,3.0,2 of 3,6.0,6.0,6 of 6
8,b0f68344e01884ff,2025-11-01,d66a46de8d705353,40f3cb27fc7305a1,Welterweight,False,d66a46de8d705353,40f3cb27fc7305a1,Red,3.0,900.0,3.0,DEC,U-DEC,Chris Tognoni,0.0,3.0,0.0,365.0,59.0,84.0,59 of 84,3.0,5.0,3 of 5,32.0,53.0,32 of 53,23.0,43.0,23 of 43,5.0,5.0,5 of 5,4.0,5.0,4 of 5,21.0,39.0,21 of 39,0.0,0.0,0 of 0,11.0,14.0,11 of 14
9,b0f68344e01884ff,2025-11-01,40f3cb27fc7305a1,d66a46de8d705353,Welterweight,False,d66a46de8d705353,40f3cb27fc7305a1,Red,3.0,900.0,3.0,DEC,U-DEC,Chris Tognoni,0.0,0.0,1.0,339.0,80.0,102.0,80 of 102,6.0,9.0,6 of 9,18.0,33.0,18 of 33,8.0,22.0,8 of 22,9.0,10.0,9 of 10,1.0,1.0,1 of 1,15.0,30.0,15 of 30,1.0,1.0,1 of 1,2.0,2.0,2 of 2


### Calculating Career Stats

In [4]:
df1 = df1.sort_values(by=['fighter_id', 'event_date'])

# Creating absorbed and received stats
df1['sig_str_absorbed'] = (
    df1
    .groupby('fight_id')['sig_str_landed']
    .transform(lambda x: x.iloc[::-1].values)
)
df1['sig_str_received'] = (
    df1
    .groupby('fight_id')['sig_str_attempted']
    .transform(lambda x: x.iloc[::-1].values)
)
df1['td_absorbed'] = (
    df1
    .groupby('fight_id')['td_landed']
    .transform(lambda x: x.iloc[::-1].values)
)
df1['td_received'] = (
    df1
    .groupby('fight_id')['td_attempted']
    .transform(lambda x: x.iloc[::-1].values)
)


### Calculating Weighted Moving Averages (with ewm)

Now it's time to calculate the averages up to the date of the matchup. The fighter_fights table has two rows per fight: one for the stats of the red fighter and one for the stats of the blue fighter. So, let's calculate the averages for both.

In [5]:
wanted = df1.columns.to_list()
unwanted = ['fight_id', 'event_date', 'fighter_id', 'opponent_id', 'weight_class',
            'is_title_fight', 'red_fighter_id', 'blue_fighter_id', 'winner_color',
            'end_round','rounds_scheduled', 'finish_type', 'total_duration',
            'decision_type', 'referee','tot_str_raw', 'td_raw', 'sig_str_raw', 
            'head_str_raw', 'body_str_raw', 'leg_str_raw','distance_str_raw',
            'clinch_str_raw','ground_str_raw','updated_at'
          ] 
          

cols = [c for c in wanted if c not in unwanted]
cols.append('total_duration')

print(cols)

df1 = df1.sort_values(['fighter_id', 'event_date', 'fight_id'])

for c in cols:
    
    df1[f"avg_{c}"] = (
        df1.groupby("fighter_id")[c]
           .transform(lambda x: x.ewm(span=5).mean().shift(1))
    )
  
final_cols = [
    "fighter_id",
    "fight_id",
    "event_date",
    "weight_class",
    "is_title_fight",
    "winner_color",
    "red_fighter_id",
    "blue_fighter_id",
    "end_round",
    "total_duration",
    "rounds_scheduled",
    "finish_type",
    "decision_type",
    "referee",
] + [
    f"avg_{c}" for c in cols
]

df1 = df1[final_cols]


['knockdowns', 'sub_attempts', 'reversals', 'ctrl_time', 'tot_str_landed', 'tot_str_attempted', 'td_landed', 'td_attempted', 'sig_str_landed', 'sig_str_attempted', 'head_str_landed', 'head_str_attempted', 'body_str_landed', 'body_str_attempted', 'leg_str_landed', 'leg_str_attempted', 'distance_str_landed', 'distance_str_attempted', 'clinch_str_landed', 'clinch_str_attempted', 'ground_str_landed', 'ground_str_attempted', 'sig_str_absorbed', 'sig_str_received', 'td_absorbed', 'td_received', 'total_duration']


In [6]:
print(df1.shape)
print(df1.columns)

df1 = df1.sort_values(by=['event_date', 'fight_id'])
df1.head(10)


(16532, 41)
Index(['fighter_id', 'fight_id', 'event_date', 'weight_class',
       'is_title_fight', 'winner_color', 'red_fighter_id', 'blue_fighter_id',
       'end_round', 'total_duration', 'rounds_scheduled', 'finish_type',
       'decision_type', 'referee', 'avg_knockdowns', 'avg_sub_attempts',
       'avg_reversals', 'avg_ctrl_time', 'avg_tot_str_landed',
       'avg_tot_str_attempted', 'avg_td_landed', 'avg_td_attempted',
       'avg_sig_str_landed', 'avg_sig_str_attempted', 'avg_head_str_landed',
       'avg_head_str_attempted', 'avg_body_str_landed',
       'avg_body_str_attempted', 'avg_leg_str_landed', 'avg_leg_str_attempted',
       'avg_distance_str_landed', 'avg_distance_str_attempted',
       'avg_clinch_str_landed', 'avg_clinch_str_attempted',
       'avg_ground_str_landed', 'avg_ground_str_attempted',
       'avg_sig_str_absorbed', 'avg_sig_str_received', 'avg_td_absorbed',
       'avg_td_received', 'avg_total_duration'],
      dtype='object')


Unnamed: 0,fighter_id,fight_id,event_date,weight_class,is_title_fight,winner_color,red_fighter_id,blue_fighter_id,end_round,total_duration,rounds_scheduled,finish_type,decision_type,referee,avg_knockdowns,avg_sub_attempts,avg_reversals,avg_ctrl_time,avg_tot_str_landed,avg_tot_str_attempted,avg_td_landed,avg_td_attempted,avg_sig_str_landed,avg_sig_str_attempted,avg_head_str_landed,avg_head_str_attempted,avg_body_str_landed,avg_body_str_attempted,avg_leg_str_landed,avg_leg_str_attempted,avg_distance_str_landed,avg_distance_str_attempted,avg_clinch_str_landed,avg_clinch_str_attempted,avg_ground_str_landed,avg_ground_str_attempted,avg_sig_str_absorbed,avg_sig_str_received,avg_td_absorbed,avg_td_received,avg_total_duration
14201,817e0bdf08efce2e,4a1f37200bc69376,2001-02-23,Middleweight,False,Red,817e0bdf08efce2e,8d26912cd2aeb366,1.0,172.0,3.0,SUB,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,
14200,8d26912cd2aeb366,4a1f37200bc69376,2001-02-23,Middleweight,False,Red,817e0bdf08efce2e,8d26912cd2aeb366,1.0,172.0,3.0,SUB,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,
14178,4604ab1de9058474,651da45cc83ce011,2001-02-23,Heavyweight,False,,afaad7d6a581e307,4604ab1de9058474,1.0,207.0,2.0,OTHER,,John McCarthy,,,,,,,,,,,,,,,,,,,,,,,,,,,
14179,afaad7d6a581e307,651da45cc83ce011,2001-02-23,Heavyweight,False,,afaad7d6a581e307,4604ab1de9058474,1.0,207.0,2.0,OTHER,,John McCarthy,,,,,,,,,,,,,,,,,,,,,,,,,,,
14184,263ebd4a669e1e98,74d0da1f9df17b9e,2001-02-23,Welterweight,False,Red,263ebd4a669e1e98,a8fa0c4e95512806,1.0,100.0,3.0,SUB,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,
14185,a8fa0c4e95512806,74d0da1f9df17b9e,2001-02-23,Welterweight,False,Red,263ebd4a669e1e98,a8fa0c4e95512806,1.0,100.0,3.0,SUB,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,
14193,ad3f53c454cbbead,75a401066ad784f6,2001-02-23,Heavyweight,False,Red,b44f39a5c6596953,ad3f53c454cbbead,2.0,561.0,3.0,KO/TKO,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,
14192,b44f39a5c6596953,75a401066ad784f6,2001-02-23,Heavyweight,False,Red,b44f39a5c6596953,ad3f53c454cbbead,2.0,561.0,3.0,KO/TKO,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,
14204,44260175069b6276,a38ec65ac140fcf8,2001-02-23,Lightweight,True,Red,44260175069b6276,eb1723480fa2f96c,5.0,1500.0,5.0,DEC,U-DEC,John McCarthy,,,,,,,,,,,,,,,,,,,,,,,,,,,
14205,eb1723480fa2f96c,a38ec65ac140fcf8,2001-02-23,Lightweight,True,Red,44260175069b6276,eb1723480fa2f96c,5.0,1500.0,5.0,DEC,U-DEC,John McCarthy,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [7]:
fighter_id = '54f64b5e283b0ce7'

df_fighter = (
    df1
    .loc[df1["fighter_id"] == fighter_id]
    .sort_values("event_date")
    .reset_index(drop=True)
)

df_fighter.head(10)



Unnamed: 0,fighter_id,fight_id,event_date,weight_class,is_title_fight,winner_color,red_fighter_id,blue_fighter_id,end_round,total_duration,rounds_scheduled,finish_type,decision_type,referee,avg_knockdowns,avg_sub_attempts,avg_reversals,avg_ctrl_time,avg_tot_str_landed,avg_tot_str_attempted,avg_td_landed,avg_td_attempted,avg_sig_str_landed,avg_sig_str_attempted,avg_head_str_landed,avg_head_str_attempted,avg_body_str_landed,avg_body_str_attempted,avg_leg_str_landed,avg_leg_str_attempted,avg_distance_str_landed,avg_distance_str_attempted,avg_clinch_str_landed,avg_clinch_str_attempted,avg_ground_str_landed,avg_ground_str_attempted,avg_sig_str_absorbed,avg_sig_str_received,avg_td_absorbed,avg_td_received,avg_total_duration
0,54f64b5e283b0ce7,22fe6779c3fa649d,2020-10-10,Featherweight,False,Blue,52c2ae6d2f2d2613,54f64b5e283b0ce7,3.0,900.0,3.0,DEC,U-DEC,Kevin Sataki,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,54f64b5e283b0ce7,b1be8b41b1a4fd85,2020-12-05,Featherweight,False,Red,54f64b5e283b0ce7,29af297d9f1de0f8,1.0,158.0,3.0,KO/TKO,,Mark Smith,0.0,5.0,0.0,482.0,30.0,62.0,5.0,9.0,13.0,33.0,7.0,23.0,6.0,10.0,0.0,0.0,12.0,30.0,1.0,2.0,0.0,1.0,17.0,53.0,0.0,2.0,900.0
2,54f64b5e283b0ce7,07468b6347ac5e3d,2021-07-10,Featherweight,False,Blue,8866c6f509c19089,54f64b5e283b0ce7,1.0,287.0,3.0,KO/TKO,,Jason Herzog,0.6,2.0,0.0,194.6,30.6,63.2,2.0,3.6,23.8,51.0,10.0,29.6,13.2,20.8,0.6,0.6,22.8,48.0,0.4,2.0,0.6,1.0,16.4,52.4,0.0,0.8,454.8
3,54f64b5e283b0ce7,a200b5dcbdd2506e,2022-03-19,Lightweight,False,Blue,4c88a1db5a46c6a4,54f64b5e283b0ce7,2.0,367.0,3.0,KO/TKO,,Marc Goddard,0.315789,1.052632,0.0,140.315789,24.631579,48.421053,1.052632,1.894737,21.052632,42.0,10.473684,26.947368,7.894737,11.894737,2.684211,3.157895,13.421053,29.052632,0.210526,1.052632,7.421053,11.894737,13.368421,35.631579,0.0,1.842105,375.315789
4,54f64b5e283b0ce7,4a17876e99f6baf3,2022-12-10,Featherweight,False,Blue,d9c6f19f958643e9,54f64b5e283b0ce7,2.0,490.0,3.0,SUB,,Marc Goddard,0.6,0.615385,0.0,148.907692,27.276923,51.153846,1.030769,2.769231,20.615385,41.584615,13.184615,31.123077,5.861538,8.615385,1.569231,1.846154,13.661538,31.523077,0.953846,1.446154,6.0,8.615385,16.953846,47.0,0.0,1.076923,371.861538
5,54f64b5e283b0ce7,c3ef3cb03edde8bb,2023-06-24,Featherweight,False,Blue,fba03cd6cc28dc41,54f64b5e283b0ce7,5.0,1500.0,5.0,DEC,U-DEC,Marc Goddard,0.753555,0.763033,0.0,121.687204,37.151659,67.218009,0.635071,1.706161,28.056872,55.180095,21.175355,43.36019,5.530806,10.298578,1.350711,1.521327,19.549763,43.990521,1.739336,2.42654,6.767773,8.763033,24.265403,60.436019,0.383886,4.118483,417.21327
6,54f64b5e283b0ce7,bec3154a11df3299,2024-02-17,Featherweight,True,Blue,e1248941344b3288,54f64b5e283b0ce7,2.0,512.0,5.0,KO/TKO,,Jason Herzog,0.843609,0.484211,0.0,155.784962,86.061654,176.031579,1.499248,2.178947,73.347368,160.718797,58.748872,137.870677,6.798496,12.381955,7.8,10.466165,54.063158,133.520301,1.834586,2.270677,17.449624,24.92782,47.189474,149.803008,0.243609,2.613534,812.878195
7,54f64b5e283b0ce7,ebf7cea27b83c432,2024-10-26,Featherweight,True,Red,54f64b5e283b0ce7,150ff4cc642270b9,3.0,694.0,5.0,KO/TKO,,Marc Goddard,0.89898,0.312773,0.0,101.690627,68.337057,141.322972,0.968431,1.407479,59.770277,131.077708,43.96746,107.821758,8.640117,13.662943,7.1627,9.593006,44.835357,111.030597,2.601263,2.882953,12.333657,17.164157,47.12239,134.648373,0.157358,1.688198,706.350656
8,54f64b5e283b0ce7,7a64d63e12618ba7,2025-06-28,Lightweight,True,Red,54f64b5e283b0ce7,07225ba28ae309b6,1.0,147.0,5.0,KO/TKO,,Marc Goddard,0.934021,0.204282,0.0,82.026487,71.688818,143.292149,1.326249,1.613006,65.052974,135.560032,45.713085,109.96479,9.458684,13.779857,9.881205,11.815385,53.911023,121.079144,1.698969,1.88295,9.442982,12.597938,58.179699,158.704203,0.102776,1.102617,702.066614


### Calculating Rates

In [8]:
# General rates
df1['w_SLpM'] = df1['avg_sig_str_landed'] / (df1['avg_total_duration'] / 60)
df1['w_SApM'] = df1['avg_sig_str_absorbed'] / (df1['avg_total_duration'] / 60)
df1['w_StrAcc'] = np.where(
  df1['avg_sig_str_attempted'] > 0, df1['avg_sig_str_landed'] / df1['avg_sig_str_attempted'],
  np.nan
)
df1['w_StrDef'] = np.where(
  df1['avg_sig_str_received'] > 0, (df1['avg_sig_str_received'] - df1['avg_sig_str_absorbed']) / df1['avg_sig_str_received'],
  np.nan
)
df1['w_TDavg'] = (df1['avg_td_landed'] / (df1['avg_total_duration'] / 900))
df1['w_TDacc'] = np.where(
  df1['avg_td_attempted'] > 0, df1['avg_td_landed'] / df1['avg_td_attempted'],
  np.nan
) 
df1['w_TDdef'] = np.where(
  df1['avg_td_received'] > 0, (df1['avg_td_received'] - df1['avg_td_absorbed']) / df1['avg_td_received'],
  np.nan
)
df1['w_SubAvg'] = (df1['avg_sub_attempts'] / (df1['avg_total_duration'] / 900))

# Additional rates
specs = ['head', 'body', 'leg', 'distance', 'clinch', 'ground']

for c in specs:
  df1[f'w_{c}_ratio'] = df1[f'avg_{c}_str_landed'] / df1['avg_sig_str_landed']
  df1[f'w_{c}_acc'] = np.where(
    df1[f'avg_{c}_str_attempted'] > 0, df1[f'avg_{c}_str_landed'] / df1[f'avg_{c}_str_attempted'],
    np.nan
  )
df1['w_knockdown_avg'] = df1['avg_knockdowns'] / (df1['avg_total_duration'] / 900)
df1['w_reversal_avg'] = df1['avg_reversals'] / (df1['avg_total_duration'] / 900)
df1['w_ctrl_time_pct'] = df1['avg_ctrl_time'] / df1['avg_total_duration']
df1['w_str_eff'] = np.where(
  df1['avg_tot_str_landed'] > 0, df1['avg_sig_str_landed'] / df1['avg_tot_str_landed'], 
  np.nan
)



In [9]:
# Deltas
cols = ['SLpM', 'SApM', 'StrAcc', 'StrDef', 'TDavg', 'TDdef', 'TDacc', 'SubAvg', 'head_ratio', 'head_acc', 'body_ratio', 'body_acc', 
        'leg_ratio', 'leg_acc', 'distance_ratio', 'distance_acc', 'clinch_ratio', 'clinch_acc', 'ground_ratio', 'ground_acc', 
        'knockdown_avg', 'reversal_avg', 'ctrl_time_pct', 'str_eff']

for c in cols:
  df1[f'opp_w_{c}'] = (
    df1
    .groupby('fight_id')[f'w_{c}']
    .transform(lambda x: x.iloc[::-1].values)
)

df1['delta_SLpM'] = df1['w_SLpM'] - df1['opp_w_SLpM']
df1['delta_SApM'] = df1['w_SApM'] - df1['opp_w_SApM']
df1['delta_StrDef'] = df1['w_StrDef'] - df1['opp_w_StrDef']
df1['delta_StrAcc'] = df1['w_StrAcc'] - df1['opp_w_StrAcc']
df1['delta_TDAvg'] = df1['w_TDavg'] - df1['opp_w_TDavg']
df1['delta_TDdef'] = df1['w_TDdef'] - df1['opp_w_TDdef']
df1['delta_TDacc'] = df1['w_TDacc'] - df1['opp_w_TDacc']
df1['delta_SubAvg'] = df1['w_SubAvg'] - df1['opp_w_SubAvg']
df1['net_str_eff']  = (df1['w_SLpM'] - df1['w_SApM']) - (df1['opp_w_SLpM'] - df1['opp_w_SApM'])

for c in specs:
  df1[f'delta_{c}_ratio'] = df1[f'w_{c}_ratio'] - df1[f'opp_w_{c}_ratio']
  df1[f'delta_{c}_acc'] = df1[f'w_{c}_acc'] - df1[f'opp_w_{c}_acc']

df1['delta_knockdown_avg'] = df1[f'w_knockdown_avg'] - df1['opp_w_knockdown_avg']
df1['delta_reversal_avg'] = df1[f'w_reversal_avg'] - df1['opp_w_reversal_avg']
df1['delta_ctrl_time_pct'] = df1[f'w_ctrl_time_pct'] - df1['opp_w_ctrl_time_pct']
df1['delta_str_eff'] = df1[f'w_str_eff'] - df1['opp_w_str_eff']


  df1['delta_reversal_avg'] = df1[f'w_reversal_avg'] - df1['opp_w_reversal_avg']
  df1['delta_ctrl_time_pct'] = df1[f'w_ctrl_time_pct'] - df1['opp_w_ctrl_time_pct']
  df1['delta_str_eff'] = df1[f'w_str_eff'] - df1['opp_w_str_eff']


In [10]:
drop_cols = [
  'avg_knockdowns',
  'avg_sub_attempts',
  'avg_reversals',
  'avg_ctrl_time',
  'avg_tot_str_landed',
  'avg_tot_str_attempted',
  'avg_td_landed',
  'avg_td_attempted',
  'avg_sig_str_landed',
  'avg_sig_str_attempted',
  'avg_head_str_landed',
  'avg_head_str_attempted',
  'avg_body_str_landed',
  'avg_body_str_attempted',
  'avg_leg_str_landed',
  'avg_leg_str_attempted',
  'avg_distance_str_landed',
  'avg_distance_str_attempted',
  'avg_clinch_str_landed',
  'avg_clinch_str_attempted',
  'avg_ground_str_landed',
  'avg_ground_str_attempted',
  'avg_sig_str_absorbed',
  'avg_sig_str_received',
  'avg_td_absorbed',
  'avg_td_received',
  'avg_total_duration'
]

additional = [f'opp_w_{c}' for c in cols]

drop_cols.extend(additional)

df1 = df1.drop(drop_cols, axis=1)


In [11]:
fighter_id = '54f64b5e283b0ce7'

df_fighter = (
    df1
    .loc[df1["fighter_id"] == fighter_id]
    .sort_values("event_date")
    .reset_index(drop=True)
)

df_fighter.head(10)

Unnamed: 0,fighter_id,fight_id,event_date,weight_class,is_title_fight,winner_color,red_fighter_id,blue_fighter_id,end_round,total_duration,rounds_scheduled,finish_type,decision_type,referee,w_SLpM,w_SApM,w_StrAcc,w_StrDef,w_TDavg,w_TDacc,w_TDdef,w_SubAvg,w_head_ratio,w_head_acc,w_body_ratio,w_body_acc,w_leg_ratio,w_leg_acc,w_distance_ratio,w_distance_acc,w_clinch_ratio,w_clinch_acc,w_ground_ratio,w_ground_acc,w_knockdown_avg,w_reversal_avg,w_ctrl_time_pct,w_str_eff,delta_SLpM,delta_SApM,delta_StrDef,delta_StrAcc,delta_TDAvg,delta_TDdef,delta_TDacc,delta_SubAvg,net_str_eff,delta_head_ratio,delta_head_acc,delta_body_ratio,delta_body_acc,delta_leg_ratio,delta_leg_acc,delta_distance_ratio,delta_distance_acc,delta_clinch_ratio,delta_clinch_acc,delta_ground_ratio,delta_ground_acc,delta_knockdown_avg,delta_reversal_avg,delta_ctrl_time_pct,delta_str_eff
0,54f64b5e283b0ce7,22fe6779c3fa649d,2020-10-10,Featherweight,False,Blue,52c2ae6d2f2d2613,54f64b5e283b0ce7,3.0,900.0,3.0,DEC,U-DEC,Kevin Sataki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,54f64b5e283b0ce7,b1be8b41b1a4fd85,2020-12-05,Featherweight,False,Red,54f64b5e283b0ce7,29af297d9f1de0f8,1.0,158.0,3.0,KO/TKO,,Mark Smith,0.866667,1.133333,0.393939,0.679245,5.0,0.555556,1.0,5.0,0.538462,0.304348,0.461538,0.6,0.0,,0.923077,0.4,0.076923,0.5,0.0,0.0,0.0,0.0,0.535556,0.433333,-1.11597,-1.557176,0.256083,0.023651,4.198635,0.675676,0.371882,0.792832,0.441206,-0.07082,0.025676,0.135191,-0.147856,-0.064371,,0.469484,0.158566,-0.287598,-0.151941,-0.181886,-0.692308,0.0,-0.601024,0.250725,-0.12287
2,54f64b5e283b0ce7,07468b6347ac5e3d,2021-07-10,Featherweight,False,Blue,8866c6f509c19089,54f64b5e283b0ce7,1.0,287.0,3.0,KO/TKO,,Jason Herzog,3.139842,2.163588,0.466667,0.687023,3.957784,0.555556,1.0,3.957784,0.420168,0.337838,0.554622,0.634615,0.02521,1.0,0.957983,0.475,0.016807,0.2,0.02521,0.6,1.187335,0.0,0.42788,0.777778,0.733906,0.842709,-0.072039,0.038668,3.798801,,0.448889,2.904524,-0.108802,0.03856,0.011413,0.221472,0.221646,-0.260032,0.20916,0.069217,0.06179,0.003591,-0.096296,-0.072808,-0.095312,0.114203,-0.536566,0.344922,0.212398
3,54f64b5e283b0ce7,a200b5dcbdd2506e,2022-03-19,Lightweight,False,Blue,4c88a1db5a46c6a4,54f64b5e283b0ce7,2.0,367.0,3.0,KO/TKO,,Marc Goddard,3.365587,2.137148,0.501253,0.624815,2.52419,0.555556,1.0,2.52419,0.4975,0.388672,0.375,0.663717,0.1275,0.85,0.6375,0.461957,0.01,0.2,0.3525,0.623894,0.757257,0.0,0.373861,0.854701,0.703072,-0.782276,0.187034,0.09604,2.52419,0.703704,,2.52419,1.485349,-0.300746,0.018753,0.301901,-0.050569,-0.001155,0.230282,-0.172442,0.090144,-0.101111,-0.704762,0.273553,0.15021,-0.760843,0.0,0.257992,0.143682
4,54f64b5e283b0ce7,4a17876e99f6baf3,2022-12-10,Featherweight,False,Blue,d9c6f19f958643e9,54f64b5e283b0ce7,2.0,490.0,3.0,SUB,,Marc Goddard,3.3263,2.735509,0.495745,0.63928,2.494725,0.372222,1.0,1.489388,0.639552,0.423628,0.284328,0.680357,0.076119,0.85,0.662687,0.433382,0.046269,0.659574,0.291045,0.696429,1.452153,0.0,0.400439,0.755781,0.882725,1.53968,0.040183,-0.191951,-1.737924,0.65625,-0.287504,0.019854,-0.656956,-0.066288,-0.212335,0.046974,-0.177474,0.019314,0.009211,0.300832,-0.117057,0.034179,0.073368,-0.335011,-0.110226,1.056259,-0.078201,-0.287972,0.440763
5,54f64b5e283b0ce7,c3ef3cb03edde8bb,2023-06-24,Featherweight,False,Blue,fba03cd6cc28dc41,54f64b5e283b0ce7,5.0,1500.0,5.0,DEC,U-DEC,Marc Goddard,4.034896,3.48964,0.50846,0.598494,1.369956,0.372222,0.906789,1.645992,0.75473,0.488359,0.197128,0.537046,0.048142,0.88785,0.696791,0.444409,0.061993,0.716797,0.241216,0.772309,1.625545,0.0,0.291667,0.755198,-0.094846,-1.994259,0.013495,0.13812,0.844693,0.174763,0.180621,1.639813,1.899413,0.023985,0.174806,0.009705,-0.128663,-0.033691,-0.040433,-0.161155,0.102029,0.036131,0.094021,0.125023,0.012186,1.030937,-0.158374,0.193525,-0.147709
6,54f64b5e283b0ce7,bec3154a11df3299,2024-02-17,Featherweight,True,Blue,e1248941344b3288,54f64b5e283b0ce7,2.0,512.0,5.0,KO/TKO,,Jason Herzog,5.413901,3.48314,0.456371,0.68499,1.659933,0.688061,0.906789,0.536107,0.800968,0.426116,0.092689,0.549065,0.106343,0.745259,0.737084,0.404906,0.025012,0.807947,0.237904,0.700006,0.934024,0.0,0.191646,0.852265,0.171103,-0.157889,0.170764,-0.116209,-0.396967,0.219433,0.234647,0.517806,0.328992,0.142884,-0.085158,-0.023659,-0.157705,-0.119225,-0.020118,-0.013935,-0.129632,-0.054448,0.092885,0.068383,-0.035861,0.685041,-0.093851,-0.023581,0.186807
7,54f64b5e283b0ce7,ebf7cea27b83c432,2024-10-26,Featherweight,True,Red,54f64b5e283b0ce7,150ff4cc642270b9,3.0,694.0,5.0,KO/TKO,,Marc Goddard,5.077105,4.002748,0.455991,0.650034,1.233931,0.688061,0.906789,0.398521,0.735607,0.407779,0.144555,0.632376,0.119837,0.746659,0.750128,0.403811,0.043521,0.902291,0.206351,0.71857,1.14544,0.0,0.143966,0.874639,-2.234729,-0.644571,0.09912,-0.094278,1.089607,0.047017,0.382415,0.189967,-1.590159,0.263873,-0.008922,-0.171746,-0.123125,-0.092127,-0.048527,-0.207098,-0.138629,0.015045,0.035373,0.192053,-0.004355,0.601887,0.0,0.110074,-0.089364
8,54f64b5e283b0ce7,7a64d63e12618ba7,2025-06-28,Lightweight,True,Red,54f64b5e283b0ce7,07225ba28ae309b6,1.0,147.0,5.0,KO/TKO,,Marc Goddard,5.559556,4.972152,0.479883,0.633408,1.700158,0.822222,0.906789,0.261876,0.702706,0.415707,0.1454,0.686414,0.151895,0.8363,0.828725,0.445254,0.026117,0.902291,0.145158,0.749566,1.197349,0.0,0.116836,0.907435,2.31679,1.545548,0.296297,-0.184166,-0.129251,0.336443,0.448697,-1.725649,0.771243,0.059297,-0.208229,-0.084954,-0.033863,0.025656,0.021281,0.182242,-0.149329,-0.079105,0.078103,-0.103137,-0.103803,0.900491,-0.295441,-0.314545,0.179468


### Dealing with NaN values

We must deal with the NaN values. These values appear because either we don't have their records prior to 2001 or it is their first fight. We should employ imputation and replace the values with the medians of their weight class

In [12]:

# for c in df1.columns:
#   if "avg_" in c:
#     group_medians = df1.groupby('weight_class')[c].transform('median')
#     df1[c] = df1[c].fillna(group_medians)


In [13]:
# print(df1.shape)
# print(df1.columns)
# df1.head(25)


### Adding in fighter context

Let's now merge the fighter attributes tables into the fight_history dataframe

In [14]:
query = """ 
  SELECT * FROM fighters;
"""

df2 = pd.read_sql(query, con=engine)
print(df1.shape)
print(df1.columns)
df2.head(10)

(16532, 63)
Index(['fighter_id', 'fight_id', 'event_date', 'weight_class',
       'is_title_fight', 'winner_color', 'red_fighter_id', 'blue_fighter_id',
       'end_round', 'total_duration', 'rounds_scheduled', 'finish_type',
       'decision_type', 'referee', 'w_SLpM', 'w_SApM', 'w_StrAcc', 'w_StrDef',
       'w_TDavg', 'w_TDacc', 'w_TDdef', 'w_SubAvg', 'w_head_ratio',
       'w_head_acc', 'w_body_ratio', 'w_body_acc', 'w_leg_ratio', 'w_leg_acc',
       'w_distance_ratio', 'w_distance_acc', 'w_clinch_ratio', 'w_clinch_acc',
       'w_ground_ratio', 'w_ground_acc', 'w_knockdown_avg', 'w_reversal_avg',
       'w_ctrl_time_pct', 'w_str_eff', 'delta_SLpM', 'delta_SApM',
       'delta_StrDef', 'delta_StrAcc', 'delta_TDAvg', 'delta_TDdef',
       'delta_TDacc', 'delta_SubAvg', 'net_str_eff', 'delta_head_ratio',
       'delta_head_acc', 'delta_body_ratio', 'delta_body_acc',
       'delta_leg_ratio', 'delta_leg_acc', 'delta_distance_ratio',
       'delta_distance_acc', 'delta_clinch_ratio', '

Unnamed: 0,fighter_id,name,height,weight,reach,stance,dob,updated_at
0,29162be25ebef5f0,Luis Gurule,65.0,125,64.0,Orthodox,1993-11-16,2026-02-03 03:17:50.681254+00:00
1,7674b836ce0698a0,Yousri Belgaroui,78.0,185,79.0,Orthodox,1992-06-02,2026-02-03 03:17:47.588404+00:00
2,b4b496ec2197ee3e,Navajo Stirling,76.0,205,79.0,Orthodox,1997-11-07,2026-02-03 03:17:52.942448+00:00
3,c06d6b18862f9a53,Rodrigo Sezinando,69.0,170,71.0,Orthodox,1997-12-11,2026-02-03 03:17:47.321274+00:00
4,723b64c0e9a8f348,Josias Musasa,68.0,135,74.0,Southpaw,1998-10-10,2026-02-03 03:17:51.024518+00:00
5,5537efc6e496dd84,Aaron Pico,68.0,145,70.0,Orthodox,1996-09-23,2026-02-03 03:17:49.455392+00:00
6,d1feea7a4bdcd503,Baisangur Susurkaev,74.0,185,79.0,Orthodox,2001-01-08,2026-02-03 03:17:49.658693+00:00
7,2f815ed5f8278ba6,Ateba Gautier,76.0,185,81.0,Switch,2002-04-10,2026-02-03 03:17:50.791989+00:00
8,4a07b1988477502c,Seokhyeon Ko,70.0,170,71.0,Southpaw,1993-09-24,2026-02-03 03:17:49.887970+00:00
9,7eaa5e31c2dd86aa,Nick Klein,73.0,185,77.0,Orthodox,1995-09-08,2026-02-03 03:17:51.335868+00:00


In [15]:
df1 = df1.merge(df2[['fighter_id', 'name', 'height', 'reach', 'stance', 'dob']], on='fighter_id', how='left')
print(df1.shape)
print(df1.columns)
df1.head(10)

(16532, 68)
Index(['fighter_id', 'fight_id', 'event_date', 'weight_class',
       'is_title_fight', 'winner_color', 'red_fighter_id', 'blue_fighter_id',
       'end_round', 'total_duration', 'rounds_scheduled', 'finish_type',
       'decision_type', 'referee', 'w_SLpM', 'w_SApM', 'w_StrAcc', 'w_StrDef',
       'w_TDavg', 'w_TDacc', 'w_TDdef', 'w_SubAvg', 'w_head_ratio',
       'w_head_acc', 'w_body_ratio', 'w_body_acc', 'w_leg_ratio', 'w_leg_acc',
       'w_distance_ratio', 'w_distance_acc', 'w_clinch_ratio', 'w_clinch_acc',
       'w_ground_ratio', 'w_ground_acc', 'w_knockdown_avg', 'w_reversal_avg',
       'w_ctrl_time_pct', 'w_str_eff', 'delta_SLpM', 'delta_SApM',
       'delta_StrDef', 'delta_StrAcc', 'delta_TDAvg', 'delta_TDdef',
       'delta_TDacc', 'delta_SubAvg', 'net_str_eff', 'delta_head_ratio',
       'delta_head_acc', 'delta_body_ratio', 'delta_body_acc',
       'delta_leg_ratio', 'delta_leg_acc', 'delta_distance_ratio',
       'delta_distance_acc', 'delta_clinch_ratio', '

Unnamed: 0,fighter_id,fight_id,event_date,weight_class,is_title_fight,winner_color,red_fighter_id,blue_fighter_id,end_round,total_duration,rounds_scheduled,finish_type,decision_type,referee,w_SLpM,w_SApM,w_StrAcc,w_StrDef,w_TDavg,w_TDacc,w_TDdef,w_SubAvg,w_head_ratio,w_head_acc,w_body_ratio,w_body_acc,w_leg_ratio,w_leg_acc,w_distance_ratio,w_distance_acc,w_clinch_ratio,w_clinch_acc,w_ground_ratio,w_ground_acc,w_knockdown_avg,w_reversal_avg,w_ctrl_time_pct,w_str_eff,delta_SLpM,delta_SApM,delta_StrDef,delta_StrAcc,delta_TDAvg,delta_TDdef,delta_TDacc,delta_SubAvg,net_str_eff,delta_head_ratio,delta_head_acc,delta_body_ratio,delta_body_acc,delta_leg_ratio,delta_leg_acc,delta_distance_ratio,delta_distance_acc,delta_clinch_ratio,delta_clinch_acc,delta_ground_ratio,delta_ground_acc,delta_knockdown_avg,delta_reversal_avg,delta_ctrl_time_pct,delta_str_eff,name,height,reach,stance,dob
0,817e0bdf08efce2e,4a1f37200bc69376,2001-02-23,Middleweight,False,Red,817e0bdf08efce2e,8d26912cd2aeb366,1.0,172.0,3.0,SUB,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Elvis Sinosic,75.0,77.0,Orthodox,1971-02-13
1,8d26912cd2aeb366,4a1f37200bc69376,2001-02-23,Middleweight,False,Red,817e0bdf08efce2e,8d26912cd2aeb366,1.0,172.0,3.0,SUB,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jeremy Horn,73.0,74.0,Orthodox,1975-08-25
2,4604ab1de9058474,651da45cc83ce011,2001-02-23,Heavyweight,False,,afaad7d6a581e307,4604ab1de9058474,1.0,207.0,2.0,OTHER,,John McCarthy,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Mark Robinson,72.0,,Orthodox,
3,afaad7d6a581e307,651da45cc83ce011,2001-02-23,Heavyweight,False,,afaad7d6a581e307,4604ab1de9058474,1.0,207.0,2.0,OTHER,,John McCarthy,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Bobby Hoffman,74.0,,Orthodox,1968-10-28
4,263ebd4a669e1e98,74d0da1f9df17b9e,2001-02-23,Welterweight,False,Red,263ebd4a669e1e98,a8fa0c4e95512806,1.0,100.0,3.0,SUB,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Fabiano Iha,68.0,,Orthodox,1970-07-28
5,a8fa0c4e95512806,74d0da1f9df17b9e,2001-02-23,Welterweight,False,Red,263ebd4a669e1e98,a8fa0c4e95512806,1.0,100.0,3.0,SUB,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Phil Johns,62.0,,Southpaw,1968-10-22
6,ad3f53c454cbbead,75a401066ad784f6,2001-02-23,Heavyweight,False,Red,b44f39a5c6596953,ad3f53c454cbbead,2.0,561.0,3.0,KO/TKO,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Josh Barnett,75.0,78.0,Orthodox,1977-11-10
7,b44f39a5c6596953,75a401066ad784f6,2001-02-23,Heavyweight,False,Red,b44f39a5c6596953,ad3f53c454cbbead,2.0,561.0,3.0,KO/TKO,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Pedro Rizzo,73.0,,Orthodox,1974-05-03
8,44260175069b6276,a38ec65ac140fcf8,2001-02-23,Lightweight,True,Red,44260175069b6276,eb1723480fa2f96c,5.0,1500.0,5.0,DEC,U-DEC,John McCarthy,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jens Pulver,67.0,70.0,Southpaw,1974-12-06
9,eb1723480fa2f96c,a38ec65ac140fcf8,2001-02-23,Lightweight,True,Red,44260175069b6276,eb1723480fa2f96c,5.0,1500.0,5.0,DEC,U-DEC,John McCarthy,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Caol Uno,67.0,70.0,Southpaw,1975-05-08


We now have to address any NaNs and also calculate the age of the fighters


In [16]:
cols = [
  'height',
  'reach',
  'stance',
  'dob'
]
for c in cols:
  if df1[c].isnull().sum()!=0:
      print(f"Nan in {c}: {df1[c].isnull().sum()}")


Nan in height: 3
Nan in reach: 856
Nan in stance: 48
Nan in dob: 26


In [17]:
cols = [
  'height',
  'reach',
]

for c in cols:
  group_medians = df1.groupby('weight_class')[c].transform('median')
  df1[c] = df1[c].fillna(group_medians)

In [18]:
df1['stance'] = df1['stance'].fillna('Orthodox')

In [19]:
is_dob_null = df1['dob'].isnull()
has_any_null_in_group = is_dob_null.groupby(df1['fight_id']).transform('any')
df1 = df1[~has_any_null_in_group]

cols = [
  'height',
  'reach',
  'stance',
  'dob'
]

for c in cols:
  if df1[c].isnull().sum()!=0:
      print(f"Nan in {c}: {df1[c].isnull().sum()}")

In [20]:
print(df1.shape)
print(df1.columns)
df1.head(10)

(16480, 68)
Index(['fighter_id', 'fight_id', 'event_date', 'weight_class',
       'is_title_fight', 'winner_color', 'red_fighter_id', 'blue_fighter_id',
       'end_round', 'total_duration', 'rounds_scheduled', 'finish_type',
       'decision_type', 'referee', 'w_SLpM', 'w_SApM', 'w_StrAcc', 'w_StrDef',
       'w_TDavg', 'w_TDacc', 'w_TDdef', 'w_SubAvg', 'w_head_ratio',
       'w_head_acc', 'w_body_ratio', 'w_body_acc', 'w_leg_ratio', 'w_leg_acc',
       'w_distance_ratio', 'w_distance_acc', 'w_clinch_ratio', 'w_clinch_acc',
       'w_ground_ratio', 'w_ground_acc', 'w_knockdown_avg', 'w_reversal_avg',
       'w_ctrl_time_pct', 'w_str_eff', 'delta_SLpM', 'delta_SApM',
       'delta_StrDef', 'delta_StrAcc', 'delta_TDAvg', 'delta_TDdef',
       'delta_TDacc', 'delta_SubAvg', 'net_str_eff', 'delta_head_ratio',
       'delta_head_acc', 'delta_body_ratio', 'delta_body_acc',
       'delta_leg_ratio', 'delta_leg_acc', 'delta_distance_ratio',
       'delta_distance_acc', 'delta_clinch_ratio', '

Unnamed: 0,fighter_id,fight_id,event_date,weight_class,is_title_fight,winner_color,red_fighter_id,blue_fighter_id,end_round,total_duration,rounds_scheduled,finish_type,decision_type,referee,w_SLpM,w_SApM,w_StrAcc,w_StrDef,w_TDavg,w_TDacc,w_TDdef,w_SubAvg,w_head_ratio,w_head_acc,w_body_ratio,w_body_acc,w_leg_ratio,w_leg_acc,w_distance_ratio,w_distance_acc,w_clinch_ratio,w_clinch_acc,w_ground_ratio,w_ground_acc,w_knockdown_avg,w_reversal_avg,w_ctrl_time_pct,w_str_eff,delta_SLpM,delta_SApM,delta_StrDef,delta_StrAcc,delta_TDAvg,delta_TDdef,delta_TDacc,delta_SubAvg,net_str_eff,delta_head_ratio,delta_head_acc,delta_body_ratio,delta_body_acc,delta_leg_ratio,delta_leg_acc,delta_distance_ratio,delta_distance_acc,delta_clinch_ratio,delta_clinch_acc,delta_ground_ratio,delta_ground_acc,delta_knockdown_avg,delta_reversal_avg,delta_ctrl_time_pct,delta_str_eff,name,height,reach,stance,dob
0,817e0bdf08efce2e,4a1f37200bc69376,2001-02-23,Middleweight,False,Red,817e0bdf08efce2e,8d26912cd2aeb366,1.0,172.0,3.0,SUB,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Elvis Sinosic,75.0,77.0,Orthodox,1971-02-13
1,8d26912cd2aeb366,4a1f37200bc69376,2001-02-23,Middleweight,False,Red,817e0bdf08efce2e,8d26912cd2aeb366,1.0,172.0,3.0,SUB,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jeremy Horn,73.0,74.0,Orthodox,1975-08-25
4,263ebd4a669e1e98,74d0da1f9df17b9e,2001-02-23,Welterweight,False,Red,263ebd4a669e1e98,a8fa0c4e95512806,1.0,100.0,3.0,SUB,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Fabiano Iha,68.0,74.0,Orthodox,1970-07-28
5,a8fa0c4e95512806,74d0da1f9df17b9e,2001-02-23,Welterweight,False,Red,263ebd4a669e1e98,a8fa0c4e95512806,1.0,100.0,3.0,SUB,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Phil Johns,62.0,74.0,Southpaw,1968-10-22
6,ad3f53c454cbbead,75a401066ad784f6,2001-02-23,Heavyweight,False,Red,b44f39a5c6596953,ad3f53c454cbbead,2.0,561.0,3.0,KO/TKO,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Josh Barnett,75.0,78.0,Orthodox,1977-11-10
7,b44f39a5c6596953,75a401066ad784f6,2001-02-23,Heavyweight,False,Red,b44f39a5c6596953,ad3f53c454cbbead,2.0,561.0,3.0,KO/TKO,,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Pedro Rizzo,73.0,78.0,Orthodox,1974-05-03
8,44260175069b6276,a38ec65ac140fcf8,2001-02-23,Lightweight,True,Red,44260175069b6276,eb1723480fa2f96c,5.0,1500.0,5.0,DEC,U-DEC,John McCarthy,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jens Pulver,67.0,70.0,Southpaw,1974-12-06
9,eb1723480fa2f96c,a38ec65ac140fcf8,2001-02-23,Lightweight,True,Red,44260175069b6276,eb1723480fa2f96c,5.0,1500.0,5.0,DEC,U-DEC,John McCarthy,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Caol Uno,67.0,70.0,Southpaw,1975-05-08
10,1652f3213655b935,a949b05c64e43131,2001-02-23,Middleweight,False,Red,1652f3213655b935,e09dbbd5fe3f0fe2,2.0,600.0,2.0,DEC,U-DEC,Mason White,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Phil Baroni,69.0,72.0,Orthodox,1976-04-16
11,e09dbbd5fe3f0fe2,a949b05c64e43131,2001-02-23,Middleweight,False,Red,1652f3213655b935,e09dbbd5fe3f0fe2,2.0,600.0,2.0,DEC,U-DEC,Mason White,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Curtis Stout,73.0,75.0,Southpaw,1973-11-07


### Encoding Categorical Columns

We must address the categorical columns. Let's apply one hot encoding on weight_class, finish_type, and decision_type

In [21]:
df1['weight_class'] = df1['weight_class'].str.replace("'", "", regex=False).str.replace(" ", "_", regex=False)
df1['finish_type'] = df1['finish_type'].str.replace("/", "_", regex=False)
df1['decision_type'] = df1['decision_type'].str.replace("-", "_", regex=False)
df1['stance'] = df1['stance'].str.replace(" ", "_", regex=False)


# One hot encoding
df1 = pd.get_dummies(df1, columns=['weight_class', 'finish_type', 'decision_type', 'stance'])

print(df1.shape)
print(df1.columns)
df1.head(10)

(16480, 90)
Index(['fighter_id', 'fight_id', 'event_date', 'is_title_fight',
       'winner_color', 'red_fighter_id', 'blue_fighter_id', 'end_round',
       'total_duration', 'rounds_scheduled', 'referee', 'w_SLpM', 'w_SApM',
       'w_StrAcc', 'w_StrDef', 'w_TDavg', 'w_TDacc', 'w_TDdef', 'w_SubAvg',
       'w_head_ratio', 'w_head_acc', 'w_body_ratio', 'w_body_acc',
       'w_leg_ratio', 'w_leg_acc', 'w_distance_ratio', 'w_distance_acc',
       'w_clinch_ratio', 'w_clinch_acc', 'w_ground_ratio', 'w_ground_acc',
       'w_knockdown_avg', 'w_reversal_avg', 'w_ctrl_time_pct', 'w_str_eff',
       'delta_SLpM', 'delta_SApM', 'delta_StrDef', 'delta_StrAcc',
       'delta_TDAvg', 'delta_TDdef', 'delta_TDacc', 'delta_SubAvg',
       'net_str_eff', 'delta_head_ratio', 'delta_head_acc', 'delta_body_ratio',
       'delta_body_acc', 'delta_leg_ratio', 'delta_leg_acc',
       'delta_distance_ratio', 'delta_distance_acc', 'delta_clinch_ratio',
       'delta_clinch_acc', 'delta_ground_ratio', 'delta_

Unnamed: 0,fighter_id,fight_id,event_date,is_title_fight,winner_color,red_fighter_id,blue_fighter_id,end_round,total_duration,rounds_scheduled,referee,w_SLpM,w_SApM,w_StrAcc,w_StrDef,w_TDavg,w_TDacc,w_TDdef,w_SubAvg,w_head_ratio,w_head_acc,w_body_ratio,w_body_acc,w_leg_ratio,w_leg_acc,w_distance_ratio,w_distance_acc,w_clinch_ratio,w_clinch_acc,w_ground_ratio,w_ground_acc,w_knockdown_avg,w_reversal_avg,w_ctrl_time_pct,w_str_eff,delta_SLpM,delta_SApM,delta_StrDef,delta_StrAcc,delta_TDAvg,delta_TDdef,delta_TDacc,delta_SubAvg,net_str_eff,delta_head_ratio,delta_head_acc,delta_body_ratio,delta_body_acc,delta_leg_ratio,delta_leg_acc,delta_distance_ratio,delta_distance_acc,delta_clinch_ratio,delta_clinch_acc,delta_ground_ratio,delta_ground_acc,delta_knockdown_avg,delta_reversal_avg,delta_ctrl_time_pct,delta_str_eff,name,height,reach,dob,weight_class_Bantamweight,weight_class_Catch_Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light_Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Welterweight,weight_class_Womens_Bantamweight,weight_class_Womens_Featherweight,weight_class_Womens_Flyweight,weight_class_Womens_Strawweight,finish_type_DEC,finish_type_DQ,finish_type_KO_TKO,finish_type_OTHER,finish_type_SUB,decision_type_M_DEC,decision_type_S_DEC,decision_type_U_DEC,stance_Open_Stance,stance_Orthodox,stance_Sideways,stance_Southpaw,stance_Switch
0,817e0bdf08efce2e,4a1f37200bc69376,2001-02-23,False,Red,817e0bdf08efce2e,8d26912cd2aeb366,1.0,172.0,3.0,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Elvis Sinosic,75.0,77.0,1971-02-13,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False
1,8d26912cd2aeb366,4a1f37200bc69376,2001-02-23,False,Red,817e0bdf08efce2e,8d26912cd2aeb366,1.0,172.0,3.0,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jeremy Horn,73.0,74.0,1975-08-25,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False
4,263ebd4a669e1e98,74d0da1f9df17b9e,2001-02-23,False,Red,263ebd4a669e1e98,a8fa0c4e95512806,1.0,100.0,3.0,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Fabiano Iha,68.0,74.0,1970-07-28,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False
5,a8fa0c4e95512806,74d0da1f9df17b9e,2001-02-23,False,Red,263ebd4a669e1e98,a8fa0c4e95512806,1.0,100.0,3.0,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Phil Johns,62.0,74.0,1968-10-22,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False
6,ad3f53c454cbbead,75a401066ad784f6,2001-02-23,False,Red,b44f39a5c6596953,ad3f53c454cbbead,2.0,561.0,3.0,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Josh Barnett,75.0,78.0,1977-11-10,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False
7,b44f39a5c6596953,75a401066ad784f6,2001-02-23,False,Red,b44f39a5c6596953,ad3f53c454cbbead,2.0,561.0,3.0,Mario Yamasaki,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Pedro Rizzo,73.0,78.0,1974-05-03,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False
8,44260175069b6276,a38ec65ac140fcf8,2001-02-23,True,Red,44260175069b6276,eb1723480fa2f96c,5.0,1500.0,5.0,John McCarthy,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Jens Pulver,67.0,70.0,1974-12-06,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,True,False
9,eb1723480fa2f96c,a38ec65ac140fcf8,2001-02-23,True,Red,44260175069b6276,eb1723480fa2f96c,5.0,1500.0,5.0,John McCarthy,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Caol Uno,67.0,70.0,1975-05-08,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,True,False
10,1652f3213655b935,a949b05c64e43131,2001-02-23,False,Red,1652f3213655b935,e09dbbd5fe3f0fe2,2.0,600.0,2.0,Mason White,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Phil Baroni,69.0,72.0,1976-04-16,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,True,False,True,False,False,False
11,e09dbbd5fe3f0fe2,a949b05c64e43131,2001-02-23,False,Red,1652f3213655b935,e09dbbd5fe3f0fe2,2.0,600.0,2.0,Mason White,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Curtis Stout,73.0,75.0,1973-11-07,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,True,False


### Removing Noisy Columns

In [None]:
df1 = df1[
    ~df1['fight_id'].isin(
        df1.loc[
            df1[
                [
                    'finish_type_DQ',
                    'finish_type_Draw',
                    'finish_type_NC',
                    'decision_type_OTHER_DEC',
                    'stance_Sideways',
                    'stance_Open_Stance'
                ]
            ].any(axis=1),
            'fight_id'
        ]
    )
]

# drop_cols = ['finish_type_DQ', 'finish_type_Draw', 'finish_type_NC', 'decision_type_OTHER_DEC', 'stance_Sideways', 'stance_Open_Stance',]

df1 = df1.drop(drop_cols, axis=1)

df1.shape

KeyError: "['finish_type_Draw', 'finish_type_NC', 'decision_type_OTHER_DEC'] not in index"

### Calculating Current Win Streak

In [None]:
df1['fighter_color'] = np.where(
    df1['fighter_id'] == df1['red_fighter_id'], 
    'Red',
    'Blue'
)

df1 = df1.sort_values(by=['fighter_id', 'event_date'])

df1['is_win'] = (df1['fighter_color'] == df1['winner_color']).astype(int)

df1['streak_group'] = (
    df1['is_win'] != df1.groupby('fighter_id')['is_win'].shift()
).groupby(df1['fighter_id']).cumsum()

df1['running_streak'] = (
    df1.groupby(['fighter_id', 'is_win', 'streak_group']).cumcount() + 1
)
df1['tmp_win_streak'] = np.where(df1['is_win'] == 1, df1['running_streak'], 0)

df1['win_streak'] = df1.groupby('fighter_id')['tmp_win_streak'].shift(fill_value=0)

df1.drop(columns=['is_win', 'streak_group', 'running_streak', 'tmp_win_streak'], inplace=True)


### Calculating Current Lose Streak

In [None]:
df1['is_loss'] = (df1['fighter_color'] != df1['winner_color']).astype(int)

df1['streak_group'] = (
    df1['is_loss'] != df1.groupby('fighter_id')['is_loss'].shift()
).groupby(df1['fighter_id']).cumsum()

df1['running_streak'] = (
    df1.groupby(['fighter_id', 'is_loss', 'streak_group']).cumcount() + 1
)
df1['tmp_loss_streak'] = np.where(df1['is_loss'] == 1, df1['running_streak'], 0)

df1['lose_streak'] = df1.groupby('fighter_id')['tmp_loss_streak'].shift(fill_value=0)

df1 = df1.drop(columns=['is_loss', 'streak_group', 'running_streak', 'tmp_loss_streak'])


### Calculating Longest Win Streak

In [None]:
# 1. Sort by fighter and date to ensure chronology
df1 = df1.sort_values(['fighter_id', 'event_date'])

# 2. Calculate the "Is Win" boolean
df1['is_win'] = df1['fighter_color'] == df1['winner_color']

# 3. Create a helper function for a running streak
def get_running_max_streak(group_series):
    # This calculates the streak at every specific fight
    streak = 0
    streaks = []
    for win in group_series:
        if win:
            streak += 1
        else:
            streak = 0
        streaks.append(streak)
    
    # This turns [1, 2, 0, 1] into [1, 2, 2, 2] (the running max)
    return pd.Series(streaks).cummax().values

# 4. Apply the running max to each fighter group
df1['longest_win_streak_to_date'] = (
    df1.groupby('fighter_id')['is_win']
    .transform(get_running_max_streak)
)

# Shifting the 'longest_win_streak' column by 1 row within each fighter group
df1['longest_win_streak'] = (
    df1.groupby('fighter_id')['longest_win_streak_to_date']
    .shift(1, fill_value=0)
)
df1 = df1.drop(columns=['is_win', 'longest_win_streak_to_date'])

In [None]:
df1 = df1.sort_values(by=['event_date', 'fight_id'], ascending=False)
print(df1.shape)
print(df1.columns)
df1.head(10)

(16052, 90)
Index(['fighter_id', 'fight_id', 'event_date', 'is_title_fight',
       'winner_color', 'red_fighter_id', 'blue_fighter_id', 'end_round',
       'total_duration', 'rounds_scheduled', 'referee', 'w_SLpM', 'w_SApM',
       'w_StrAcc', 'w_StrDef', 'w_TDavg', 'w_TDacc', 'w_TDdef', 'w_SubAvg',
       'w_head_ratio', 'w_head_acc', 'w_body_ratio', 'w_body_acc',
       'w_leg_ratio', 'w_leg_acc', 'w_distance_ratio', 'w_distance_acc',
       'w_clinch_ratio', 'w_clinch_acc', 'w_ground_ratio', 'w_ground_acc',
       'w_knockdown_avg', 'w_reversal_avg', 'w_ctrl_time_pct', 'w_str_eff',
       'delta_SLpM', 'delta_SApM', 'delta_StrDef', 'delta_StrAcc',
       'delta_TDAvg', 'delta_TDdef', 'delta_TDacc', 'delta_SubAvg',
       'net_str_eff', 'delta_head_ratio', 'delta_head_acc', 'delta_body_ratio',
       'delta_body_acc', 'delta_leg_ratio', 'delta_leg_acc',
       'delta_distance_ratio', 'delta_distance_acc', 'delta_clinch_ratio',
       'delta_clinch_acc', 'delta_ground_ratio', 'delta_

Unnamed: 0,fighter_id,fight_id,event_date,is_title_fight,winner_color,red_fighter_id,blue_fighter_id,end_round,total_duration,rounds_scheduled,referee,w_SLpM,w_SApM,w_StrAcc,w_StrDef,w_TDavg,w_TDacc,w_TDdef,w_SubAvg,w_head_ratio,w_head_acc,w_body_ratio,w_body_acc,w_leg_ratio,w_leg_acc,w_distance_ratio,w_distance_acc,w_clinch_ratio,w_clinch_acc,w_ground_ratio,w_ground_acc,w_knockdown_avg,w_reversal_avg,w_ctrl_time_pct,w_str_eff,delta_SLpM,delta_SApM,delta_StrDef,delta_StrAcc,delta_TDAvg,delta_TDdef,delta_TDacc,delta_SubAvg,net_str_eff,delta_head_ratio,delta_head_acc,delta_body_ratio,delta_body_acc,delta_leg_ratio,delta_leg_acc,delta_distance_ratio,delta_distance_acc,delta_clinch_ratio,delta_clinch_acc,delta_ground_ratio,delta_ground_acc,delta_knockdown_avg,delta_reversal_avg,delta_ctrl_time_pct,delta_str_eff,name,height,reach,dob,weight_class_Bantamweight,weight_class_Catch_Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light_Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Welterweight,weight_class_Womens_Bantamweight,weight_class_Womens_Featherweight,weight_class_Womens_Flyweight,weight_class_Womens_Strawweight,finish_type_DEC,finish_type_KO_TKO,finish_type_SUB,decision_type_M_DEC,decision_type_S_DEC,decision_type_U_DEC,stance_Orthodox,stance_Southpaw,stance_Switch,fighter_color,win_streak,lose_streak,longest_win_streak
16480,1dd1a3aacde66f72,5c73e0013b859664,2026-01-31,False,Blue,1dd1a3aacde66f72,a1ad35b98a9136e5,3,900,3,Jim Perdios,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Sulangrangbo,69.0,66.0,2005-07-17,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,True,False,Red,0,0,0
16481,a1ad35b98a9136e5,5c73e0013b859664,2026-01-31,False,Blue,1dd1a3aacde66f72,a1ad35b98a9136e5,3,900,3,Jim Perdios,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Lawrence Lui,67.0,72.0,1996-08-04,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,True,False,False,Blue,0,0,0
16478,7826923b47f8d72a,fa4b3f5ce8055921,2026-01-24,True,Red,9e8f6c728eb01124,7826923b47f8d72a,5,1500,5,Marc Goddard,5.50202,2.512756,0.554881,0.437971,0.92069,0.334454,0.448636,1.170622,0.691857,0.484693,0.092076,0.779149,0.216067,0.842036,0.621161,0.478964,0.050559,0.702554,0.32828,0.757556,0.276278,0.51909,0.321063,0.678782,0.389755,-3.491953,-0.006402,-0.004922,0.819596,0.143351,0.258414,1.170622,3.881707,0.056043,0.011118,-0.069827,0.027198,0.013784,-0.043418,-0.192716,-0.049667,-0.105608,-0.077206,0.298324,0.113005,-0.356986,0.51909,0.278344,-0.206214,Paddy Pimblett,70.0,73.0,1995-01-03,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,True,True,False,False,Blue,7,0,7
16479,9e8f6c728eb01124,fa4b3f5ce8055921,2026-01-24,True,Red,9e8f6c728eb01124,7826923b47f8d72a,5,1500,5,Marc Goddard,5.112266,6.004708,0.559802,0.444373,0.101094,0.07604,0.305285,0.0,0.635814,0.473575,0.161904,0.751951,0.202283,0.885454,0.813876,0.528631,0.156168,0.77976,0.029956,0.644551,0.633264,0.0,0.042719,0.884995,-0.389755,3.491953,0.006402,0.004922,-0.819596,-0.143351,-0.258414,-1.170622,-3.881707,-0.056043,-0.011118,0.069827,-0.027198,-0.013784,0.043418,0.192716,0.049667,0.105608,0.077206,-0.298324,-0.113005,0.356986,-0.51909,-0.278344,0.206214,Justin Gaethje,71.0,70.0,1988-11-14,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,True,True,False,False,Red,1,0,4
16476,d3df1add9d9a7efb,d93ee3860196c4ae,2026-01-24,False,Red,fc08099550072fe4,d3df1add9d9a7efb,2,494,3,Jason Herzog,2.568751,2.316439,0.532508,0.34048,0.596263,0.213486,0.498665,0.0,0.835541,0.504884,0.158779,0.732199,0.00568,0.925365,0.293705,0.359167,0.20188,0.758255,0.504415,0.635344,1.420603,0.471066,0.149219,0.850129,-2.205241,-1.213932,-0.231536,0.059136,0.3241,-0.33841,-0.304085,-0.090721,-0.991309,0.123137,0.089789,0.039691,0.111783,-0.162828,0.100602,-0.564337,-0.10294,0.1663,0.335775,0.398037,0.015002,-0.592897,0.471066,0.024247,0.02594,Derrick Lewis,75.0,79.0,1985-02-07,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,Blue,2,0,6
16477,fc08099550072fe4,d93ee3860196c4ae,2026-01-24,False,Red,fc08099550072fe4,d3df1add9d9a7efb,2,494,3,Jason Herzog,4.773992,3.530371,0.473372,0.572016,0.272163,0.517572,0.837075,0.090721,0.712404,0.415095,0.119087,0.620416,0.168509,0.824763,0.858042,0.462107,0.03558,0.42248,0.106378,0.620342,2.013501,0.0,0.124973,0.824189,2.205241,1.213932,0.231536,-0.059136,-0.3241,0.33841,0.304085,0.090721,0.991309,-0.123137,-0.089789,-0.039691,-0.111783,0.162828,-0.100602,0.564337,0.10294,-0.1663,-0.335775,-0.398037,-0.015002,0.592897,-0.471066,-0.024247,-0.02594,Waldo Cortes Acosta,76.0,78.0,1991-10-03,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,Red,2,0,5
16474,262d32ebda89efc4,d63c9aa4f4873001,2026-01-24,False,Red,262d32ebda89efc4,47b63240018d5d86,3,900,3,Jason Herzog,5.033528,2.760609,0.446318,0.656646,0.06664,0.099225,0.930693,0.0,0.597285,0.346371,0.230343,0.710004,0.172373,0.899079,0.950608,0.435869,0.036291,0.851067,0.013101,0.772358,0.124951,0.04998,0.031301,0.880978,1.876554,-0.215858,-0.002973,0.040153,-1.316962,0.215747,-0.384884,-0.011052,2.092412,-0.096333,0.004281,0.013046,0.04684,0.083287,0.064918,0.016732,0.042892,-0.011918,0.032707,-0.004814,0.102083,-0.260647,-0.047598,-0.177362,0.128948,Natalia Silva,64.0,65.0,1997-02-03,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,True,False,True,False,Red,7,0,7
16475,47b63240018d5d86,d63c9aa4f4873001,2026-01-24,False,Red,262d32ebda89efc4,47b63240018d5d86,3,900,3,Jason Herzog,3.156974,2.976466,0.406165,0.659619,1.383602,0.484109,0.714946,0.011052,0.693617,0.34209,0.217297,0.663165,0.089086,0.834162,0.933876,0.392977,0.048209,0.818361,0.017915,0.670275,0.385598,0.097578,0.208663,0.752029,-1.876554,0.215858,0.002973,-0.040153,1.316962,-0.215747,0.384884,0.011052,-2.092412,0.096333,-0.004281,-0.013046,-0.04684,-0.083287,-0.064918,-0.016732,-0.042892,0.011918,-0.032707,0.004814,-0.102083,0.260647,0.047598,0.177362,-0.128948,Rose Namajunas,65.0,65.0,1992-06-29,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,True,True,False,False,Blue,1,0,3
16472,814e5233e2acf2ee,b5aafe149bec4306,2026-01-24,False,Red,ab2b4ff41d6ebe0f,814e5233e2acf2ee,1,196,3,Jason Herzog,4.830012,4.588707,0.470655,0.549888,0.874601,0.194277,0.711073,0.18104,0.480107,0.338921,0.249693,0.678129,0.2702,0.794903,0.812304,0.440013,0.133207,0.672896,0.054489,0.675632,0.773618,0.045422,0.086163,0.813841,0.943897,1.155321,-0.02438,0.034259,-1.42837,0.065392,-0.401056,0.132856,-0.211425,0.011278,0.025644,-0.0244,-0.001935,0.013122,0.138966,0.019573,0.043127,0.019014,0.049669,-0.038587,-0.16368,0.397486,0.045422,-0.082842,0.178451,Charles Johnson,69.0,70.0,1991-01-10,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,Blue,1,0,4
16473,ab2b4ff41d6ebe0f,b5aafe149bec4306,2026-01-24,False,Red,ab2b4ff41d6ebe0f,814e5233e2acf2ee,1,196,3,Jason Herzog,3.886115,3.433386,0.436396,0.574268,2.302971,0.595333,0.645681,0.048184,0.468829,0.313277,0.274093,0.680065,0.257078,0.655937,0.792731,0.396887,0.114193,0.623228,0.093077,0.839312,0.376132,0.0,0.169005,0.635389,-0.943897,-1.155321,0.02438,-0.034259,1.42837,-0.065392,0.401056,-0.132856,0.211425,-0.011278,-0.025644,0.0244,0.001935,-0.013122,-0.138966,-0.019573,-0.043127,-0.019014,-0.049669,0.038587,0.16368,-0.397486,-0.045422,0.082842,-0.178451,Alex Perez,66.0,65.0,1992-03-21,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,Red,0,2,3


### Calculating Win By Columns

In [None]:
cols = [
  'finish_type_KO_TKO',
  'finish_type_SUB', 
  'decision_type_M_DEC', 
  'decision_type_S_DEC', 
  'decision_type_U_DEC'
]

df1 = df1.sort_values(by=['fighter_id', 'event_date'])

for col in cols:
    win_signal = (
        (df1['winner_color'] == df1['fighter_color']) &
        (df1[col] == 1)
    ).astype(int)

    new_col_name = "wins_by_" + col.split("_type_")[1]
    df1[new_col_name] = (
        win_signal
        .groupby(df1['fighter_id'])
        .transform(lambda x: x.cumsum().shift(1, fill_value=0))
    )

df1 = df1.drop(columns=['finish_type_DEC','finish_type_KO_TKO', 
       'finish_type_SUB', 'decision_type_M_DEC', 'decision_type_S_DEC', 'decision_type_U_DEC'])



In [None]:
df1 = df1.sort_values(by=['event_date', 'fight_id'], ascending=False)
print(df1.shape)
print(df1.columns)
df1.head(10)

(16052, 89)
Index(['fighter_id', 'fight_id', 'event_date', 'is_title_fight',
       'winner_color', 'red_fighter_id', 'blue_fighter_id', 'end_round',
       'total_duration', 'rounds_scheduled', 'referee', 'w_SLpM', 'w_SApM',
       'w_StrAcc', 'w_StrDef', 'w_TDavg', 'w_TDacc', 'w_TDdef', 'w_SubAvg',
       'w_head_ratio', 'w_head_acc', 'w_body_ratio', 'w_body_acc',
       'w_leg_ratio', 'w_leg_acc', 'w_distance_ratio', 'w_distance_acc',
       'w_clinch_ratio', 'w_clinch_acc', 'w_ground_ratio', 'w_ground_acc',
       'w_knockdown_avg', 'w_reversal_avg', 'w_ctrl_time_pct', 'w_str_eff',
       'delta_SLpM', 'delta_SApM', 'delta_StrDef', 'delta_StrAcc',
       'delta_TDAvg', 'delta_TDdef', 'delta_TDacc', 'delta_SubAvg',
       'net_str_eff', 'delta_head_ratio', 'delta_head_acc', 'delta_body_ratio',
       'delta_body_acc', 'delta_leg_ratio', 'delta_leg_acc',
       'delta_distance_ratio', 'delta_distance_acc', 'delta_clinch_ratio',
       'delta_clinch_acc', 'delta_ground_ratio', 'delta_

Unnamed: 0,fighter_id,fight_id,event_date,is_title_fight,winner_color,red_fighter_id,blue_fighter_id,end_round,total_duration,rounds_scheduled,referee,w_SLpM,w_SApM,w_StrAcc,w_StrDef,w_TDavg,w_TDacc,w_TDdef,w_SubAvg,w_head_ratio,w_head_acc,w_body_ratio,w_body_acc,w_leg_ratio,w_leg_acc,w_distance_ratio,w_distance_acc,w_clinch_ratio,w_clinch_acc,w_ground_ratio,w_ground_acc,w_knockdown_avg,w_reversal_avg,w_ctrl_time_pct,w_str_eff,delta_SLpM,delta_SApM,delta_StrDef,delta_StrAcc,delta_TDAvg,delta_TDdef,delta_TDacc,delta_SubAvg,net_str_eff,delta_head_ratio,delta_head_acc,delta_body_ratio,delta_body_acc,delta_leg_ratio,delta_leg_acc,delta_distance_ratio,delta_distance_acc,delta_clinch_ratio,delta_clinch_acc,delta_ground_ratio,delta_ground_acc,delta_knockdown_avg,delta_reversal_avg,delta_ctrl_time_pct,delta_str_eff,name,height,reach,dob,weight_class_Bantamweight,weight_class_Catch_Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light_Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Welterweight,weight_class_Womens_Bantamweight,weight_class_Womens_Featherweight,weight_class_Womens_Flyweight,weight_class_Womens_Strawweight,stance_Orthodox,stance_Southpaw,stance_Switch,fighter_color,win_streak,lose_streak,longest_win_streak,wins_by_KO_TKO,wins_by_SUB,wins_by_M_DEC,wins_by_S_DEC,wins_by_U_DEC
16480,1dd1a3aacde66f72,5c73e0013b859664,2026-01-31,False,Blue,1dd1a3aacde66f72,a1ad35b98a9136e5,3,900,3,Jim Perdios,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Sulangrangbo,69.0,66.0,2005-07-17,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,Red,0,0,0,0,0,0,0,0
16481,a1ad35b98a9136e5,5c73e0013b859664,2026-01-31,False,Blue,1dd1a3aacde66f72,a1ad35b98a9136e5,3,900,3,Jim Perdios,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Lawrence Lui,67.0,72.0,1996-08-04,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,Blue,0,0,0,0,0,0,0,0
16478,7826923b47f8d72a,fa4b3f5ce8055921,2026-01-24,True,Red,9e8f6c728eb01124,7826923b47f8d72a,5,1500,5,Marc Goddard,5.50202,2.512756,0.554881,0.437971,0.92069,0.334454,0.448636,1.170622,0.691857,0.484693,0.092076,0.779149,0.216067,0.842036,0.621161,0.478964,0.050559,0.702554,0.32828,0.757556,0.276278,0.51909,0.321063,0.678782,0.389755,-3.491953,-0.006402,-0.004922,0.819596,0.143351,0.258414,1.170622,3.881707,0.056043,0.011118,-0.069827,0.027198,0.013784,-0.043418,-0.192716,-0.049667,-0.105608,-0.077206,0.298324,0.113005,-0.356986,0.51909,0.278344,-0.206214,Paddy Pimblett,70.0,73.0,1995-01-03,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,Blue,7,0,7,2,3,0,0,2
16479,9e8f6c728eb01124,fa4b3f5ce8055921,2026-01-24,True,Red,9e8f6c728eb01124,7826923b47f8d72a,5,1500,5,Marc Goddard,5.112266,6.004708,0.559802,0.444373,0.101094,0.07604,0.305285,0.0,0.635814,0.473575,0.161904,0.751951,0.202283,0.885454,0.813876,0.528631,0.156168,0.77976,0.029956,0.644551,0.633264,0.0,0.042719,0.884995,-0.389755,3.491953,0.006402,0.004922,-0.819596,-0.143351,-0.258414,-1.170622,-3.881707,-0.056043,-0.011118,0.069827,-0.027198,-0.013784,0.043418,0.192716,0.049667,0.105608,0.077206,-0.298324,-0.113005,0.356986,-0.51909,-0.278344,0.206214,Justin Gaethje,71.0,70.0,1988-11-14,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,Red,1,0,4,6,0,1,0,2
16476,d3df1add9d9a7efb,d93ee3860196c4ae,2026-01-24,False,Red,fc08099550072fe4,d3df1add9d9a7efb,2,494,3,Jason Herzog,2.568751,2.316439,0.532508,0.34048,0.596263,0.213486,0.498665,0.0,0.835541,0.504884,0.158779,0.732199,0.00568,0.925365,0.293705,0.359167,0.20188,0.758255,0.504415,0.635344,1.420603,0.471066,0.149219,0.850129,-2.205241,-1.213932,-0.231536,0.059136,0.3241,-0.33841,-0.304085,-0.090721,-0.991309,0.123137,0.089789,0.039691,0.111783,-0.162828,0.100602,-0.564337,-0.10294,0.1663,0.335775,0.398037,0.015002,-0.592897,0.471066,0.024247,0.02594,Derrick Lewis,75.0,79.0,1985-02-07,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,Blue,2,0,6,16,0,0,2,2
16477,fc08099550072fe4,d93ee3860196c4ae,2026-01-24,False,Red,fc08099550072fe4,d3df1add9d9a7efb,2,494,3,Jason Herzog,4.773992,3.530371,0.473372,0.572016,0.272163,0.517572,0.837075,0.090721,0.712404,0.415095,0.119087,0.620416,0.168509,0.824763,0.858042,0.462107,0.03558,0.42248,0.106378,0.620342,2.013501,0.0,0.124973,0.824189,2.205241,1.213932,0.231536,-0.059136,-0.3241,0.33841,0.304085,0.090721,0.991309,-0.123137,-0.089789,-0.039691,-0.111783,0.162828,-0.100602,0.564337,0.10294,-0.1663,-0.335775,-0.398037,-0.015002,0.592897,-0.471066,-0.024247,-0.02594,Waldo Cortes Acosta,76.0,78.0,1991-10-03,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,Red,2,0,5,4,0,0,0,5
16474,262d32ebda89efc4,d63c9aa4f4873001,2026-01-24,False,Red,262d32ebda89efc4,47b63240018d5d86,3,900,3,Jason Herzog,5.033528,2.760609,0.446318,0.656646,0.06664,0.099225,0.930693,0.0,0.597285,0.346371,0.230343,0.710004,0.172373,0.899079,0.950608,0.435869,0.036291,0.851067,0.013101,0.772358,0.124951,0.04998,0.031301,0.880978,1.876554,-0.215858,-0.002973,0.040153,-1.316962,0.215747,-0.384884,-0.011052,2.092412,-0.096333,0.004281,0.013046,0.04684,0.083287,0.064918,0.016732,0.042892,-0.011918,0.032707,-0.004814,0.102083,-0.260647,-0.047598,-0.177362,0.128948,Natalia Silva,64.0,65.0,1997-02-03,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,Red,7,0,7,2,0,0,0,5
16475,47b63240018d5d86,d63c9aa4f4873001,2026-01-24,False,Red,262d32ebda89efc4,47b63240018d5d86,3,900,3,Jason Herzog,3.156974,2.976466,0.406165,0.659619,1.383602,0.484109,0.714946,0.011052,0.693617,0.34209,0.217297,0.663165,0.089086,0.834162,0.933876,0.392977,0.048209,0.818361,0.017915,0.670275,0.385598,0.097578,0.208663,0.752029,-1.876554,0.215858,0.002973,-0.040153,1.316962,-0.215747,0.384884,0.011052,-2.092412,0.096333,-0.004281,-0.013046,-0.04684,-0.083287,-0.064918,-0.016732,-0.042892,0.011918,-0.032707,0.004814,-0.102083,0.260647,0.047598,0.177362,-0.128948,Rose Namajunas,65.0,65.0,1992-06-29,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,Blue,1,0,3,2,3,0,2,5
16472,814e5233e2acf2ee,b5aafe149bec4306,2026-01-24,False,Red,ab2b4ff41d6ebe0f,814e5233e2acf2ee,1,196,3,Jason Herzog,4.830012,4.588707,0.470655,0.549888,0.874601,0.194277,0.711073,0.18104,0.480107,0.338921,0.249693,0.678129,0.2702,0.794903,0.812304,0.440013,0.133207,0.672896,0.054489,0.675632,0.773618,0.045422,0.086163,0.813841,0.943897,1.155321,-0.02438,0.034259,-1.42837,0.065392,-0.401056,0.132856,-0.211425,0.011278,0.025644,-0.0244,-0.001935,0.013122,0.138966,0.019573,0.043127,0.019014,0.049669,-0.038587,-0.16368,0.397486,0.045422,-0.082842,0.178451,Charles Johnson,69.0,70.0,1991-01-10,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,Blue,1,0,4,3,0,0,1,3
16473,ab2b4ff41d6ebe0f,b5aafe149bec4306,2026-01-24,False,Red,ab2b4ff41d6ebe0f,814e5233e2acf2ee,1,196,3,Jason Herzog,3.886115,3.433386,0.436396,0.574268,2.302971,0.595333,0.645681,0.048184,0.468829,0.313277,0.274093,0.680065,0.257078,0.655937,0.792731,0.396887,0.114193,0.623228,0.093077,0.839312,0.376132,0.0,0.169005,0.635389,-0.943897,-1.155321,0.02438,-0.034259,1.42837,-0.065392,0.401056,-0.132856,0.211425,-0.011278,-0.025644,0.0244,0.001935,-0.013122,-0.138966,-0.019573,-0.043127,-0.019014,-0.049669,0.038587,0.16368,-0.397486,-0.045422,0.082842,-0.178451,Alex Perez,66.0,65.0,1992-03-21,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,Red,0,2,3,3,2,0,0,2


### Calculating Age

In [None]:
def calculate_age(row):
    return row['event_date'].year - row['dob'].year - (
        (row['event_date'].month, row['event_date'].day) < 
        (row['dob'].month, row['dob'].day)
    )

df1['age'] = df1.apply(calculate_age, axis=1)
df1 = df1.drop(columns=['dob'])

In [None]:
print(df1.shape)
print(df1.columns)
df1.head(10)

(16052, 89)
Index(['fighter_id', 'fight_id', 'event_date', 'is_title_fight',
       'winner_color', 'red_fighter_id', 'blue_fighter_id', 'end_round',
       'total_duration', 'rounds_scheduled', 'referee', 'w_SLpM', 'w_SApM',
       'w_StrAcc', 'w_StrDef', 'w_TDavg', 'w_TDacc', 'w_TDdef', 'w_SubAvg',
       'w_head_ratio', 'w_head_acc', 'w_body_ratio', 'w_body_acc',
       'w_leg_ratio', 'w_leg_acc', 'w_distance_ratio', 'w_distance_acc',
       'w_clinch_ratio', 'w_clinch_acc', 'w_ground_ratio', 'w_ground_acc',
       'w_knockdown_avg', 'w_reversal_avg', 'w_ctrl_time_pct', 'w_str_eff',
       'delta_SLpM', 'delta_SApM', 'delta_StrDef', 'delta_StrAcc',
       'delta_TDAvg', 'delta_TDdef', 'delta_TDacc', 'delta_SubAvg',
       'net_str_eff', 'delta_head_ratio', 'delta_head_acc', 'delta_body_ratio',
       'delta_body_acc', 'delta_leg_ratio', 'delta_leg_acc',
       'delta_distance_ratio', 'delta_distance_acc', 'delta_clinch_ratio',
       'delta_clinch_acc', 'delta_ground_ratio', 'delta_

Unnamed: 0,fighter_id,fight_id,event_date,is_title_fight,winner_color,red_fighter_id,blue_fighter_id,end_round,total_duration,rounds_scheduled,referee,w_SLpM,w_SApM,w_StrAcc,w_StrDef,w_TDavg,w_TDacc,w_TDdef,w_SubAvg,w_head_ratio,w_head_acc,w_body_ratio,w_body_acc,w_leg_ratio,w_leg_acc,w_distance_ratio,w_distance_acc,w_clinch_ratio,w_clinch_acc,w_ground_ratio,w_ground_acc,w_knockdown_avg,w_reversal_avg,w_ctrl_time_pct,w_str_eff,delta_SLpM,delta_SApM,delta_StrDef,delta_StrAcc,delta_TDAvg,delta_TDdef,delta_TDacc,delta_SubAvg,net_str_eff,delta_head_ratio,delta_head_acc,delta_body_ratio,delta_body_acc,delta_leg_ratio,delta_leg_acc,delta_distance_ratio,delta_distance_acc,delta_clinch_ratio,delta_clinch_acc,delta_ground_ratio,delta_ground_acc,delta_knockdown_avg,delta_reversal_avg,delta_ctrl_time_pct,delta_str_eff,name,height,reach,weight_class_Bantamweight,weight_class_Catch_Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light_Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Welterweight,weight_class_Womens_Bantamweight,weight_class_Womens_Featherweight,weight_class_Womens_Flyweight,weight_class_Womens_Strawweight,stance_Orthodox,stance_Southpaw,stance_Switch,fighter_color,win_streak,lose_streak,longest_win_streak,wins_by_KO_TKO,wins_by_SUB,wins_by_M_DEC,wins_by_S_DEC,wins_by_U_DEC,age
16480,1dd1a3aacde66f72,5c73e0013b859664,2026-01-31,False,Blue,1dd1a3aacde66f72,a1ad35b98a9136e5,3,900,3,Jim Perdios,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Sulangrangbo,69.0,66.0,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,Red,0,0,0,0,0,0,0,0,20
16481,a1ad35b98a9136e5,5c73e0013b859664,2026-01-31,False,Blue,1dd1a3aacde66f72,a1ad35b98a9136e5,3,900,3,Jim Perdios,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Lawrence Lui,67.0,72.0,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,Blue,0,0,0,0,0,0,0,0,29
16478,7826923b47f8d72a,fa4b3f5ce8055921,2026-01-24,True,Red,9e8f6c728eb01124,7826923b47f8d72a,5,1500,5,Marc Goddard,5.50202,2.512756,0.554881,0.437971,0.92069,0.334454,0.448636,1.170622,0.691857,0.484693,0.092076,0.779149,0.216067,0.842036,0.621161,0.478964,0.050559,0.702554,0.32828,0.757556,0.276278,0.51909,0.321063,0.678782,0.389755,-3.491953,-0.006402,-0.004922,0.819596,0.143351,0.258414,1.170622,3.881707,0.056043,0.011118,-0.069827,0.027198,0.013784,-0.043418,-0.192716,-0.049667,-0.105608,-0.077206,0.298324,0.113005,-0.356986,0.51909,0.278344,-0.206214,Paddy Pimblett,70.0,73.0,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,Blue,7,0,7,2,3,0,0,2,31
16479,9e8f6c728eb01124,fa4b3f5ce8055921,2026-01-24,True,Red,9e8f6c728eb01124,7826923b47f8d72a,5,1500,5,Marc Goddard,5.112266,6.004708,0.559802,0.444373,0.101094,0.07604,0.305285,0.0,0.635814,0.473575,0.161904,0.751951,0.202283,0.885454,0.813876,0.528631,0.156168,0.77976,0.029956,0.644551,0.633264,0.0,0.042719,0.884995,-0.389755,3.491953,0.006402,0.004922,-0.819596,-0.143351,-0.258414,-1.170622,-3.881707,-0.056043,-0.011118,0.069827,-0.027198,-0.013784,0.043418,0.192716,0.049667,0.105608,0.077206,-0.298324,-0.113005,0.356986,-0.51909,-0.278344,0.206214,Justin Gaethje,71.0,70.0,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,Red,1,0,4,6,0,1,0,2,37
16476,d3df1add9d9a7efb,d93ee3860196c4ae,2026-01-24,False,Red,fc08099550072fe4,d3df1add9d9a7efb,2,494,3,Jason Herzog,2.568751,2.316439,0.532508,0.34048,0.596263,0.213486,0.498665,0.0,0.835541,0.504884,0.158779,0.732199,0.00568,0.925365,0.293705,0.359167,0.20188,0.758255,0.504415,0.635344,1.420603,0.471066,0.149219,0.850129,-2.205241,-1.213932,-0.231536,0.059136,0.3241,-0.33841,-0.304085,-0.090721,-0.991309,0.123137,0.089789,0.039691,0.111783,-0.162828,0.100602,-0.564337,-0.10294,0.1663,0.335775,0.398037,0.015002,-0.592897,0.471066,0.024247,0.02594,Derrick Lewis,75.0,79.0,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,Blue,2,0,6,16,0,0,2,2,40
16477,fc08099550072fe4,d93ee3860196c4ae,2026-01-24,False,Red,fc08099550072fe4,d3df1add9d9a7efb,2,494,3,Jason Herzog,4.773992,3.530371,0.473372,0.572016,0.272163,0.517572,0.837075,0.090721,0.712404,0.415095,0.119087,0.620416,0.168509,0.824763,0.858042,0.462107,0.03558,0.42248,0.106378,0.620342,2.013501,0.0,0.124973,0.824189,2.205241,1.213932,0.231536,-0.059136,-0.3241,0.33841,0.304085,0.090721,0.991309,-0.123137,-0.089789,-0.039691,-0.111783,0.162828,-0.100602,0.564337,0.10294,-0.1663,-0.335775,-0.398037,-0.015002,0.592897,-0.471066,-0.024247,-0.02594,Waldo Cortes Acosta,76.0,78.0,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,Red,2,0,5,4,0,0,0,5,34
16474,262d32ebda89efc4,d63c9aa4f4873001,2026-01-24,False,Red,262d32ebda89efc4,47b63240018d5d86,3,900,3,Jason Herzog,5.033528,2.760609,0.446318,0.656646,0.06664,0.099225,0.930693,0.0,0.597285,0.346371,0.230343,0.710004,0.172373,0.899079,0.950608,0.435869,0.036291,0.851067,0.013101,0.772358,0.124951,0.04998,0.031301,0.880978,1.876554,-0.215858,-0.002973,0.040153,-1.316962,0.215747,-0.384884,-0.011052,2.092412,-0.096333,0.004281,0.013046,0.04684,0.083287,0.064918,0.016732,0.042892,-0.011918,0.032707,-0.004814,0.102083,-0.260647,-0.047598,-0.177362,0.128948,Natalia Silva,64.0,65.0,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,Red,7,0,7,2,0,0,0,5,28
16475,47b63240018d5d86,d63c9aa4f4873001,2026-01-24,False,Red,262d32ebda89efc4,47b63240018d5d86,3,900,3,Jason Herzog,3.156974,2.976466,0.406165,0.659619,1.383602,0.484109,0.714946,0.011052,0.693617,0.34209,0.217297,0.663165,0.089086,0.834162,0.933876,0.392977,0.048209,0.818361,0.017915,0.670275,0.385598,0.097578,0.208663,0.752029,-1.876554,0.215858,0.002973,-0.040153,1.316962,-0.215747,0.384884,0.011052,-2.092412,0.096333,-0.004281,-0.013046,-0.04684,-0.083287,-0.064918,-0.016732,-0.042892,0.011918,-0.032707,0.004814,-0.102083,0.260647,0.047598,0.177362,-0.128948,Rose Namajunas,65.0,65.0,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,Blue,1,0,3,2,3,0,2,5,33
16472,814e5233e2acf2ee,b5aafe149bec4306,2026-01-24,False,Red,ab2b4ff41d6ebe0f,814e5233e2acf2ee,1,196,3,Jason Herzog,4.830012,4.588707,0.470655,0.549888,0.874601,0.194277,0.711073,0.18104,0.480107,0.338921,0.249693,0.678129,0.2702,0.794903,0.812304,0.440013,0.133207,0.672896,0.054489,0.675632,0.773618,0.045422,0.086163,0.813841,0.943897,1.155321,-0.02438,0.034259,-1.42837,0.065392,-0.401056,0.132856,-0.211425,0.011278,0.025644,-0.0244,-0.001935,0.013122,0.138966,0.019573,0.043127,0.019014,0.049669,-0.038587,-0.16368,0.397486,0.045422,-0.082842,0.178451,Charles Johnson,69.0,70.0,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,Blue,1,0,4,3,0,0,1,3,35
16473,ab2b4ff41d6ebe0f,b5aafe149bec4306,2026-01-24,False,Red,ab2b4ff41d6ebe0f,814e5233e2acf2ee,1,196,3,Jason Herzog,3.886115,3.433386,0.436396,0.574268,2.302971,0.595333,0.645681,0.048184,0.468829,0.313277,0.274093,0.680065,0.257078,0.655937,0.792731,0.396887,0.114193,0.623228,0.093077,0.839312,0.376132,0.0,0.169005,0.635389,-0.943897,-1.155321,0.02438,-0.034259,1.42837,-0.065392,0.401056,-0.132856,0.211425,-0.011278,-0.025644,0.0244,0.001935,-0.013122,-0.138966,-0.019573,-0.043127,-0.019014,-0.049669,0.038587,0.16368,-0.397486,-0.045422,0.082842,-0.178451,Alex Perez,66.0,65.0,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,Red,0,2,3,3,2,0,0,2,33


### Adding is_debut feature

In [None]:
df1 = df1.sort_values(by=['event_date', 'fight_id'])
df1['is_debut'] = (
    df1.groupby('fighter_id').cumcount() == 0
).astype(int)

### Combining Red and Blue Fighters Into Single Row

In [None]:
shared_cols = [
    'fight_id', 'event_date', 'is_title_fight', 'winner_color', 'end_round', 
    'total_duration', 'rounds_scheduled', 'referee', 'weight_class_Bantamweight',
    'weight_class_Catch_Weight', 'weight_class_Featherweight',
    'weight_class_Flyweight', 'weight_class_Heavyweight',
    'weight_class_Light_Heavyweight', 'weight_class_Lightweight',
    'weight_class_Middleweight', 'weight_class_Welterweight',
    'weight_class_Womens_Bantamweight', 'weight_class_Womens_Featherweight',
    'weight_class_Womens_Flyweight', 'weight_class_Womens_Strawweight', 
    'delta_TDAvg', 'delta_TDdef', 'delta_TDacc', 'delta_SubAvg', 'net_str_eff',
    'delta_head_ratio', 'delta_head_acc', 'delta_body_ratio', 'delta_body_acc', 'delta_leg_ratio', 'delta_leg_acc',
    'delta_knockdown_avg', 'delta_reversal_avg', 'delta_ctrl_time_pct', 
    'delta_SLpM', 'delta_SApM', 'delta_StrDef', 'delta_StrAcc', 'delta_str_eff',
]

fighter_cols = [
    'fighter_id','w_SLpM', 'w_StrAcc', 'w_SApM', 
    'w_StrDef', 'w_TDavg', 'w_TDacc', 'w_TDdef', 'w_SubAvg', 'w_knockdown_avg', 'w_reversal_avg', 'w_ctrl_time_pct', 
    'w_str_eff','name', 'height', 'reach', 'stance_Orthodox','stance_Southpaw', 'stance_Switch',
    'fighter_color', 'win_streak','lose_streak', 'longest_win_streak', 'wins_by_KO_TKO', 'wins_by_SUB',
    'wins_by_M_DEC', 'wins_by_S_DEC', 'wins_by_U_DEC', 'age', 'is_debut'
]

red_df = df1[df1['fighter_color'] == 'Red'][['fight_id'] + fighter_cols]
blue_df = df1[df1['fighter_color'] == 'Blue'][['fight_id'] + fighter_cols]

red_df = red_df.add_prefix('R_').rename(columns={'R_fight_id': 'fight_id'})
blue_df = blue_df.add_prefix('B_').rename(columns={'B_fight_id': 'fight_id'})

shared_df = df1[shared_cols].drop_duplicates(subset=['fight_id'])

df1 = shared_df.merge(red_df, on='fight_id').merge(blue_df, on='fight_id')

In [None]:
print(df1.shape)
print(df1.columns)
df1.dtypes

(8026, 100)
Index(['fight_id', 'event_date', 'is_title_fight', 'winner_color', 'end_round',
       'total_duration', 'rounds_scheduled', 'referee',
       'weight_class_Bantamweight', 'weight_class_Catch_Weight',
       'weight_class_Featherweight', 'weight_class_Flyweight',
       'weight_class_Heavyweight', 'weight_class_Light_Heavyweight',
       'weight_class_Lightweight', 'weight_class_Middleweight',
       'weight_class_Welterweight', 'weight_class_Womens_Bantamweight',
       'weight_class_Womens_Featherweight', 'weight_class_Womens_Flyweight',
       'weight_class_Womens_Strawweight', 'delta_TDAvg', 'delta_TDdef',
       'delta_TDacc', 'delta_SubAvg', 'net_str_eff', 'delta_head_ratio',
       'delta_head_acc', 'delta_body_ratio', 'delta_body_acc',
       'delta_leg_ratio', 'delta_leg_acc', 'delta_knockdown_avg',
       'delta_reversal_avg', 'delta_ctrl_time_pct', 'delta_SLpM', 'delta_SApM',
       'delta_StrDef', 'delta_StrAcc', 'delta_str_eff', 'R_fighter_id',
       'R_w_SLpM

fight_id           object
event_date         object
is_title_fight       bool
winner_color       object
end_round           int64
                    ...  
B_wins_by_M_DEC     int64
B_wins_by_S_DEC     int64
B_wins_by_U_DEC     int64
B_age               int64
B_is_debut          int64
Length: 100, dtype: object

In [None]:
df1.head(10)

Unnamed: 0,fight_id,event_date,is_title_fight,winner_color,end_round,total_duration,rounds_scheduled,referee,weight_class_Bantamweight,weight_class_Catch_Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light_Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Welterweight,weight_class_Womens_Bantamweight,weight_class_Womens_Featherweight,weight_class_Womens_Flyweight,weight_class_Womens_Strawweight,delta_TDAvg,delta_TDdef,delta_TDacc,delta_SubAvg,net_str_eff,delta_head_ratio,delta_head_acc,delta_body_ratio,delta_body_acc,delta_leg_ratio,delta_leg_acc,delta_knockdown_avg,delta_reversal_avg,delta_ctrl_time_pct,delta_SLpM,delta_SApM,delta_StrDef,delta_StrAcc,delta_str_eff,R_fighter_id,R_w_SLpM,R_w_StrAcc,R_w_SApM,R_w_StrDef,R_w_TDavg,R_w_TDacc,R_w_TDdef,R_w_SubAvg,R_w_knockdown_avg,R_w_reversal_avg,R_w_ctrl_time_pct,R_w_str_eff,R_name,R_height,R_reach,R_stance_Orthodox,R_stance_Southpaw,R_stance_Switch,R_fighter_color,R_win_streak,R_lose_streak,R_longest_win_streak,R_wins_by_KO_TKO,R_wins_by_SUB,R_wins_by_M_DEC,R_wins_by_S_DEC,R_wins_by_U_DEC,R_age,R_is_debut,B_fighter_id,B_w_SLpM,B_w_StrAcc,B_w_SApM,B_w_StrDef,B_w_TDavg,B_w_TDacc,B_w_TDdef,B_w_SubAvg,B_w_knockdown_avg,B_w_reversal_avg,B_w_ctrl_time_pct,B_w_str_eff,B_name,B_height,B_reach,B_stance_Orthodox,B_stance_Southpaw,B_stance_Switch,B_fighter_color,B_win_streak,B_lose_streak,B_longest_win_streak,B_wins_by_KO_TKO,B_wins_by_SUB,B_wins_by_M_DEC,B_wins_by_S_DEC,B_wins_by_U_DEC,B_age,B_is_debut
0,4a1f37200bc69376,2001-02-23,False,Red,1,172,3,Mario Yamasaki,False,False,False,False,False,False,False,True,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,817e0bdf08efce2e,,,,,,,,,,,,,Elvis Sinosic,75.0,77.0,True,False,False,Red,0,0,0,0,0,0,0,0,30,1,8d26912cd2aeb366,,,,,,,,,,,,,Jeremy Horn,73.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,25,1
1,74d0da1f9df17b9e,2001-02-23,False,Red,1,100,3,Mario Yamasaki,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,263ebd4a669e1e98,,,,,,,,,,,,,Fabiano Iha,68.0,74.0,True,False,False,Red,0,0,0,0,0,0,0,0,30,1,a8fa0c4e95512806,,,,,,,,,,,,,Phil Johns,62.0,74.0,False,True,False,Blue,0,0,0,0,0,0,0,0,32,1
2,75a401066ad784f6,2001-02-23,False,Red,2,561,3,Mario Yamasaki,False,False,False,False,True,False,False,False,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,b44f39a5c6596953,,,,,,,,,,,,,Pedro Rizzo,73.0,78.0,True,False,False,Red,0,0,0,0,0,0,0,0,26,1,ad3f53c454cbbead,,,,,,,,,,,,,Josh Barnett,75.0,78.0,True,False,False,Blue,0,0,0,0,0,0,0,0,23,1
3,a38ec65ac140fcf8,2001-02-23,True,Red,5,1500,5,John McCarthy,False,False,False,False,False,False,True,False,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,44260175069b6276,,,,,,,,,,,,,Jens Pulver,67.0,70.0,False,True,False,Red,0,0,0,0,0,0,0,0,26,1,eb1723480fa2f96c,,,,,,,,,,,,,Caol Uno,67.0,70.0,False,True,False,Blue,0,0,0,0,0,0,0,0,25,1
4,a949b05c64e43131,2001-02-23,False,Red,2,600,2,Mason White,False,False,False,False,False,False,False,True,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,1652f3213655b935,,,,,,,,,,,,,Phil Baroni,69.0,72.0,True,False,False,Red,0,0,0,0,0,0,0,0,24,1,e09dbbd5fe3f0fe2,,,,,,,,,,,,,Curtis Stout,73.0,75.0,False,True,False,Blue,0,0,0,0,0,0,0,0,27,1
5,bfb468c3427faa50,2001-02-23,False,Red,2,587,2,Mason White,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,029880cdbf5ca089,,,,,,,,,,,,,Sean Sherk,66.0,67.0,True,False,False,Red,0,0,0,0,0,0,0,0,27,1,cb6783c39c01d896,,,,,,,,,,,,,Tiki Ghosn,70.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,24,1
6,f8820294bbe01486,2001-02-23,True,Red,1,32,5,John McCarthy,False,False,False,False,False,False,False,True,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,2f732dd9210d301f,,,,,,,,,,,,,Tito Ortiz,75.0,74.0,True,False,False,Red,0,0,0,0,0,0,0,0,26,1,8f2d9ee27f206f1f,,,,,,,,,,,,,Evan Tanner,72.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,30,1
7,0434a68809885ea1,2001-05-04,False,Red,3,891,3,Mario Yamasaki,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,7b6e40a73b567072,,,,,,,,,,,,,Shonie Carter,70.0,74.0,False,True,False,Red,0,0,0,0,0,0,0,0,29,1,86dfed7cc24a9fa7,,,,,,,,,,,,,Matt Serra,66.0,68.0,True,False,False,Blue,0,0,0,0,0,0,0,0,26,1
8,34ab72077963ee74,2001-05-04,True,Red,3,770,5,John McCarthy,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,952f6fa06f25a0ec,,,,,,,,,,,,,Carlos Newton,69.0,74.0,True,False,False,Red,0,0,0,0,0,0,0,0,24,1,cedfdf8d423d500c,,,,,,,,,,,,,Pat Miletich,70.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,33,1
9,49693a06fc1d8906,2001-05-04,False,Red,3,900,3,Mario Yamasaki,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,6ad9058b7c7898b0,,,,,,,,,,,,,Tony DeSouza,72.0,70.0,False,True,False,Red,0,0,0,0,0,0,0,0,26,1,21b1132949f2a9b0,,,,,,,,,,,,,Steve Berger,71.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,27,1


### Creating Deltas in Fighter Attributes

In [None]:
df1['delta_age'] = df1['R_age'] - df1['B_age']
df1['delta_height'] = df1['R_height'] - df1['B_height']
df1['delta_reach'] = df1['R_reach'] - df1['B_reach']

In [None]:
print(df1.shape)
print(df1.columns)
df1.head(10)

(8026, 103)
Index(['fight_id', 'event_date', 'is_title_fight', 'winner_color', 'end_round',
       'total_duration', 'rounds_scheduled', 'referee',
       'weight_class_Bantamweight', 'weight_class_Catch_Weight',
       ...
       'B_wins_by_KO_TKO', 'B_wins_by_SUB', 'B_wins_by_M_DEC',
       'B_wins_by_S_DEC', 'B_wins_by_U_DEC', 'B_age', 'B_is_debut',
       'delta_age', 'delta_height', 'delta_reach'],
      dtype='object', length=103)


Unnamed: 0,fight_id,event_date,is_title_fight,winner_color,end_round,total_duration,rounds_scheduled,referee,weight_class_Bantamweight,weight_class_Catch_Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light_Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Welterweight,weight_class_Womens_Bantamweight,weight_class_Womens_Featherweight,weight_class_Womens_Flyweight,weight_class_Womens_Strawweight,delta_TDAvg,delta_TDdef,delta_TDacc,delta_SubAvg,net_str_eff,delta_head_ratio,delta_head_acc,delta_body_ratio,delta_body_acc,delta_leg_ratio,delta_leg_acc,delta_knockdown_avg,delta_reversal_avg,delta_ctrl_time_pct,delta_SLpM,delta_SApM,delta_StrDef,delta_StrAcc,delta_str_eff,R_fighter_id,R_w_SLpM,R_w_StrAcc,R_w_SApM,R_w_StrDef,R_w_TDavg,R_w_TDacc,R_w_TDdef,R_w_SubAvg,R_w_knockdown_avg,R_w_reversal_avg,R_w_ctrl_time_pct,R_w_str_eff,R_name,R_height,R_reach,R_stance_Orthodox,R_stance_Southpaw,R_stance_Switch,R_fighter_color,R_win_streak,R_lose_streak,R_longest_win_streak,R_wins_by_KO_TKO,R_wins_by_SUB,R_wins_by_M_DEC,R_wins_by_S_DEC,R_wins_by_U_DEC,R_age,R_is_debut,B_fighter_id,B_w_SLpM,B_w_StrAcc,B_w_SApM,B_w_StrDef,B_w_TDavg,B_w_TDacc,B_w_TDdef,B_w_SubAvg,B_w_knockdown_avg,B_w_reversal_avg,B_w_ctrl_time_pct,B_w_str_eff,B_name,B_height,B_reach,B_stance_Orthodox,B_stance_Southpaw,B_stance_Switch,B_fighter_color,B_win_streak,B_lose_streak,B_longest_win_streak,B_wins_by_KO_TKO,B_wins_by_SUB,B_wins_by_M_DEC,B_wins_by_S_DEC,B_wins_by_U_DEC,B_age,B_is_debut,delta_age,delta_height,delta_reach
0,4a1f37200bc69376,2001-02-23,False,Red,1,172,3,Mario Yamasaki,False,False,False,False,False,False,False,True,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,817e0bdf08efce2e,,,,,,,,,,,,,Elvis Sinosic,75.0,77.0,True,False,False,Red,0,0,0,0,0,0,0,0,30,1,8d26912cd2aeb366,,,,,,,,,,,,,Jeremy Horn,73.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,25,1,5,2.0,3.0
1,74d0da1f9df17b9e,2001-02-23,False,Red,1,100,3,Mario Yamasaki,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,263ebd4a669e1e98,,,,,,,,,,,,,Fabiano Iha,68.0,74.0,True,False,False,Red,0,0,0,0,0,0,0,0,30,1,a8fa0c4e95512806,,,,,,,,,,,,,Phil Johns,62.0,74.0,False,True,False,Blue,0,0,0,0,0,0,0,0,32,1,-2,6.0,0.0
2,75a401066ad784f6,2001-02-23,False,Red,2,561,3,Mario Yamasaki,False,False,False,False,True,False,False,False,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,b44f39a5c6596953,,,,,,,,,,,,,Pedro Rizzo,73.0,78.0,True,False,False,Red,0,0,0,0,0,0,0,0,26,1,ad3f53c454cbbead,,,,,,,,,,,,,Josh Barnett,75.0,78.0,True,False,False,Blue,0,0,0,0,0,0,0,0,23,1,3,-2.0,0.0
3,a38ec65ac140fcf8,2001-02-23,True,Red,5,1500,5,John McCarthy,False,False,False,False,False,False,True,False,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,44260175069b6276,,,,,,,,,,,,,Jens Pulver,67.0,70.0,False,True,False,Red,0,0,0,0,0,0,0,0,26,1,eb1723480fa2f96c,,,,,,,,,,,,,Caol Uno,67.0,70.0,False,True,False,Blue,0,0,0,0,0,0,0,0,25,1,1,0.0,0.0
4,a949b05c64e43131,2001-02-23,False,Red,2,600,2,Mason White,False,False,False,False,False,False,False,True,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,1652f3213655b935,,,,,,,,,,,,,Phil Baroni,69.0,72.0,True,False,False,Red,0,0,0,0,0,0,0,0,24,1,e09dbbd5fe3f0fe2,,,,,,,,,,,,,Curtis Stout,73.0,75.0,False,True,False,Blue,0,0,0,0,0,0,0,0,27,1,-3,-4.0,-3.0
5,bfb468c3427faa50,2001-02-23,False,Red,2,587,2,Mason White,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,029880cdbf5ca089,,,,,,,,,,,,,Sean Sherk,66.0,67.0,True,False,False,Red,0,0,0,0,0,0,0,0,27,1,cb6783c39c01d896,,,,,,,,,,,,,Tiki Ghosn,70.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,24,1,3,-4.0,-7.0
6,f8820294bbe01486,2001-02-23,True,Red,1,32,5,John McCarthy,False,False,False,False,False,False,False,True,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,2f732dd9210d301f,,,,,,,,,,,,,Tito Ortiz,75.0,74.0,True,False,False,Red,0,0,0,0,0,0,0,0,26,1,8f2d9ee27f206f1f,,,,,,,,,,,,,Evan Tanner,72.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,30,1,-4,3.0,0.0
7,0434a68809885ea1,2001-05-04,False,Red,3,891,3,Mario Yamasaki,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,7b6e40a73b567072,,,,,,,,,,,,,Shonie Carter,70.0,74.0,False,True,False,Red,0,0,0,0,0,0,0,0,29,1,86dfed7cc24a9fa7,,,,,,,,,,,,,Matt Serra,66.0,68.0,True,False,False,Blue,0,0,0,0,0,0,0,0,26,1,3,4.0,6.0
8,34ab72077963ee74,2001-05-04,True,Red,3,770,5,John McCarthy,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,952f6fa06f25a0ec,,,,,,,,,,,,,Carlos Newton,69.0,74.0,True,False,False,Red,0,0,0,0,0,0,0,0,24,1,cedfdf8d423d500c,,,,,,,,,,,,,Pat Miletich,70.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,33,1,-9,-1.0,0.0
9,49693a06fc1d8906,2001-05-04,False,Red,3,900,3,Mario Yamasaki,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,6ad9058b7c7898b0,,,,,,,,,,,,,Tony DeSouza,72.0,70.0,False,True,False,Red,0,0,0,0,0,0,0,0,26,1,21b1132949f2a9b0,,,,,,,,,,,,,Steve Berger,71.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,27,1,-1,1.0,-4.0


In [None]:
df1 = df1.sort_values(by=['event_date', 'fight_id'])
df1.head(15)


Unnamed: 0,fight_id,event_date,is_title_fight,winner_color,end_round,total_duration,rounds_scheduled,referee,weight_class_Bantamweight,weight_class_Catch_Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light_Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Welterweight,weight_class_Womens_Bantamweight,weight_class_Womens_Featherweight,weight_class_Womens_Flyweight,weight_class_Womens_Strawweight,delta_TDAvg,delta_TDdef,delta_TDacc,delta_SubAvg,net_str_eff,delta_head_ratio,delta_head_acc,delta_body_ratio,delta_body_acc,delta_leg_ratio,delta_leg_acc,delta_knockdown_avg,delta_reversal_avg,delta_ctrl_time_pct,delta_SLpM,delta_SApM,delta_StrDef,delta_StrAcc,delta_str_eff,R_fighter_id,R_w_SLpM,R_w_StrAcc,R_w_SApM,R_w_StrDef,R_w_TDavg,R_w_TDacc,R_w_TDdef,R_w_SubAvg,R_w_knockdown_avg,R_w_reversal_avg,R_w_ctrl_time_pct,R_w_str_eff,R_name,R_height,R_reach,R_stance_Orthodox,R_stance_Southpaw,R_stance_Switch,R_fighter_color,R_win_streak,R_lose_streak,R_longest_win_streak,R_wins_by_KO_TKO,R_wins_by_SUB,R_wins_by_M_DEC,R_wins_by_S_DEC,R_wins_by_U_DEC,R_age,R_is_debut,B_fighter_id,B_w_SLpM,B_w_StrAcc,B_w_SApM,B_w_StrDef,B_w_TDavg,B_w_TDacc,B_w_TDdef,B_w_SubAvg,B_w_knockdown_avg,B_w_reversal_avg,B_w_ctrl_time_pct,B_w_str_eff,B_name,B_height,B_reach,B_stance_Orthodox,B_stance_Southpaw,B_stance_Switch,B_fighter_color,B_win_streak,B_lose_streak,B_longest_win_streak,B_wins_by_KO_TKO,B_wins_by_SUB,B_wins_by_M_DEC,B_wins_by_S_DEC,B_wins_by_U_DEC,B_age,B_is_debut,delta_age,delta_height,delta_reach
0,4a1f37200bc69376,2001-02-23,False,Red,1,172,3,Mario Yamasaki,False,False,False,False,False,False,False,True,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,817e0bdf08efce2e,,,,,,,,,,,,,Elvis Sinosic,75.0,77.0,True,False,False,Red,0,0,0,0,0,0,0,0,30,1,8d26912cd2aeb366,,,,,,,,,,,,,Jeremy Horn,73.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,25,1,5,2.0,3.0
1,74d0da1f9df17b9e,2001-02-23,False,Red,1,100,3,Mario Yamasaki,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,263ebd4a669e1e98,,,,,,,,,,,,,Fabiano Iha,68.0,74.0,True,False,False,Red,0,0,0,0,0,0,0,0,30,1,a8fa0c4e95512806,,,,,,,,,,,,,Phil Johns,62.0,74.0,False,True,False,Blue,0,0,0,0,0,0,0,0,32,1,-2,6.0,0.0
2,75a401066ad784f6,2001-02-23,False,Red,2,561,3,Mario Yamasaki,False,False,False,False,True,False,False,False,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,b44f39a5c6596953,,,,,,,,,,,,,Pedro Rizzo,73.0,78.0,True,False,False,Red,0,0,0,0,0,0,0,0,26,1,ad3f53c454cbbead,,,,,,,,,,,,,Josh Barnett,75.0,78.0,True,False,False,Blue,0,0,0,0,0,0,0,0,23,1,3,-2.0,0.0
3,a38ec65ac140fcf8,2001-02-23,True,Red,5,1500,5,John McCarthy,False,False,False,False,False,False,True,False,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,44260175069b6276,,,,,,,,,,,,,Jens Pulver,67.0,70.0,False,True,False,Red,0,0,0,0,0,0,0,0,26,1,eb1723480fa2f96c,,,,,,,,,,,,,Caol Uno,67.0,70.0,False,True,False,Blue,0,0,0,0,0,0,0,0,25,1,1,0.0,0.0
4,a949b05c64e43131,2001-02-23,False,Red,2,600,2,Mason White,False,False,False,False,False,False,False,True,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,1652f3213655b935,,,,,,,,,,,,,Phil Baroni,69.0,72.0,True,False,False,Red,0,0,0,0,0,0,0,0,24,1,e09dbbd5fe3f0fe2,,,,,,,,,,,,,Curtis Stout,73.0,75.0,False,True,False,Blue,0,0,0,0,0,0,0,0,27,1,-3,-4.0,-3.0
5,bfb468c3427faa50,2001-02-23,False,Red,2,587,2,Mason White,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,029880cdbf5ca089,,,,,,,,,,,,,Sean Sherk,66.0,67.0,True,False,False,Red,0,0,0,0,0,0,0,0,27,1,cb6783c39c01d896,,,,,,,,,,,,,Tiki Ghosn,70.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,24,1,3,-4.0,-7.0
6,f8820294bbe01486,2001-02-23,True,Red,1,32,5,John McCarthy,False,False,False,False,False,False,False,True,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,2f732dd9210d301f,,,,,,,,,,,,,Tito Ortiz,75.0,74.0,True,False,False,Red,0,0,0,0,0,0,0,0,26,1,8f2d9ee27f206f1f,,,,,,,,,,,,,Evan Tanner,72.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,30,1,-4,3.0,0.0
7,0434a68809885ea1,2001-05-04,False,Red,3,891,3,Mario Yamasaki,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,7b6e40a73b567072,,,,,,,,,,,,,Shonie Carter,70.0,74.0,False,True,False,Red,0,0,0,0,0,0,0,0,29,1,86dfed7cc24a9fa7,,,,,,,,,,,,,Matt Serra,66.0,68.0,True,False,False,Blue,0,0,0,0,0,0,0,0,26,1,3,4.0,6.0
8,34ab72077963ee74,2001-05-04,True,Red,3,770,5,John McCarthy,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,952f6fa06f25a0ec,,,,,,,,,,,,,Carlos Newton,69.0,74.0,True,False,False,Red,0,0,0,0,0,0,0,0,24,1,cedfdf8d423d500c,,,,,,,,,,,,,Pat Miletich,70.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,33,1,-9,-1.0,0.0
9,49693a06fc1d8906,2001-05-04,False,Red,3,900,3,Mario Yamasaki,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,6ad9058b7c7898b0,,,,,,,,,,,,,Tony DeSouza,72.0,70.0,False,True,False,Red,0,0,0,0,0,0,0,0,26,1,21b1132949f2a9b0,,,,,,,,,,,,,Steve Berger,71.0,74.0,True,False,False,Blue,0,0,0,0,0,0,0,0,27,1,-1,1.0,-4.0


In [None]:
fighter_id = '54f64b5e283b0ce7'

df_fighter = (
    df1
    .loc[(df1["B_fighter_id"] == fighter_id) | (df1['R_fighter_id'] == fighter_id)]
    .sort_values("event_date")
    .reset_index(drop=True)
)

df_fighter.head(10)

Unnamed: 0,fight_id,event_date,is_title_fight,winner_color,end_round,total_duration,rounds_scheduled,referee,weight_class_Bantamweight,weight_class_Catch_Weight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_Light_Heavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_Welterweight,weight_class_Womens_Bantamweight,weight_class_Womens_Featherweight,weight_class_Womens_Flyweight,weight_class_Womens_Strawweight,delta_TDAvg,delta_TDdef,delta_TDacc,delta_SubAvg,net_str_eff,delta_head_ratio,delta_head_acc,delta_body_ratio,delta_body_acc,delta_leg_ratio,delta_leg_acc,delta_knockdown_avg,delta_reversal_avg,delta_ctrl_time_pct,delta_SLpM,delta_SApM,delta_StrDef,delta_StrAcc,delta_str_eff,R_fighter_id,R_w_SLpM,R_w_StrAcc,R_w_SApM,R_w_StrDef,R_w_TDavg,R_w_TDacc,R_w_TDdef,R_w_SubAvg,R_w_knockdown_avg,R_w_reversal_avg,R_w_ctrl_time_pct,R_w_str_eff,R_name,R_height,R_reach,R_stance_Orthodox,R_stance_Southpaw,R_stance_Switch,R_fighter_color,R_win_streak,R_lose_streak,R_longest_win_streak,R_wins_by_KO_TKO,R_wins_by_SUB,R_wins_by_M_DEC,R_wins_by_S_DEC,R_wins_by_U_DEC,R_age,R_is_debut,B_fighter_id,B_w_SLpM,B_w_StrAcc,B_w_SApM,B_w_StrDef,B_w_TDavg,B_w_TDacc,B_w_TDdef,B_w_SubAvg,B_w_knockdown_avg,B_w_reversal_avg,B_w_ctrl_time_pct,B_w_str_eff,B_name,B_height,B_reach,B_stance_Orthodox,B_stance_Southpaw,B_stance_Switch,B_fighter_color,B_win_streak,B_lose_streak,B_longest_win_streak,B_wins_by_KO_TKO,B_wins_by_SUB,B_wins_by_M_DEC,B_wins_by_S_DEC,B_wins_by_U_DEC,B_age,B_is_debut,delta_age,delta_height,delta_reach
0,22fe6779c3fa649d,2020-10-10,False,Blue,3,900,3,Kevin Sataki,False,False,True,False,False,False,False,False,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,52c2ae6d2f2d2613,3.382456,0.560465,1.484211,0.63877,2.684211,0.485714,0.771429,1.842105,0.473684,0.315789,0.403275,0.711965,Youssef Zalal,70.0,72.0,False,False,True,Red,3,0,3,0,0,0,0,3,24,0,54f64b5e283b0ce7,,,,,,,,,,,,,Ilia Topuria,67.0,69.0,True,False,False,Blue,0,0,0,0,0,0,0,0,23,1,1,3.0,3.0
1,b1be8b41b1a4fd85,2020-12-05,False,Red,1,158,3,Mark Smith,False,False,True,False,False,False,False,False,False,False,False,False,False,-4.198635,-0.675676,-0.371882,-0.792832,-0.441206,0.07082,-0.025676,-0.135191,0.147856,0.064371,,0.0,0.601024,-0.250725,1.11597,1.557176,-0.256083,-0.023651,0.12287,54f64b5e283b0ce7,0.866667,0.393939,1.133333,0.679245,5.0,0.555556,1.0,5.0,0.0,0.0,0.535556,0.433333,Ilia Topuria,67.0,69.0,True,False,False,Red,1,0,1,0,0,0,0,1,23,0,29af297d9f1de0f8,1.982637,0.370288,2.69051,0.423163,0.801365,0.183673,0.324324,4.207168,0.0,0.601024,0.284831,0.556203,Damon Jackson,71.0,71.0,False,False,True,Blue,1,0,1,0,1,0,0,0,32,0,-9,-4.0,-2.0
2,07468b6347ac5e3d,2021-07-10,False,Blue,1,287,3,Jason Herzog,False,False,True,False,False,False,False,False,False,False,False,False,False,3.798801,,0.448889,2.904524,-0.108802,0.03856,0.011413,0.221472,0.221646,-0.260032,0.20916,0.114203,-0.536566,0.344922,0.733906,0.842709,-0.072039,0.038668,0.212398,8866c6f509c19089,2.405935,0.427999,1.32088,0.759062,0.158983,0.106667,,1.053259,1.073132,0.536566,0.082958,0.56538,Ryan Hall,70.0,70.0,False,True,False,Red,4,0,4,0,1,0,0,3,36,0,54f64b5e283b0ce7,3.139842,0.466667,2.163588,0.687023,3.957784,0.555556,1.0,3.957784,1.187335,0.0,0.42788,0.777778,Ilia Topuria,67.0,69.0,True,False,False,Blue,2,0,2,1,0,0,0,1,24,0,12,3.0,1.0
3,a200b5dcbdd2506e,2022-03-19,False,Blue,2,367,3,Marc Goddard,False,False,False,False,False,False,True,False,False,False,False,False,False,-2.52419,-0.703704,,-2.52419,-1.485349,0.300746,-0.018753,-0.301901,0.050569,0.001155,-0.230282,0.760843,0.0,-0.257992,-0.703072,0.782276,-0.187034,-0.09604,-0.143682,4c88a1db5a46c6a4,2.662515,0.405213,2.919424,0.437781,0.0,,0.296296,0.0,1.5181,0.0,0.115869,0.711019,Jai Herbert,73.0,77.0,True,False,False,Red,1,0,1,1,0,0,0,0,33,0,54f64b5e283b0ce7,3.365587,0.501253,2.137148,0.624815,2.52419,0.555556,1.0,2.52419,0.757257,0.0,0.373861,0.854701,Ilia Topuria,67.0,69.0,True,False,False,Blue,3,0,3,2,0,0,0,1,25,0,8,6.0,8.0
4,4a17876e99f6baf3,2022-12-10,False,Blue,2,490,3,Marc Goddard,False,False,True,False,False,False,False,False,False,False,False,False,False,-1.737924,0.65625,-0.287504,0.019854,-0.656956,-0.066288,-0.212335,0.046974,-0.177474,0.019314,0.009211,1.056259,-0.078201,-0.287972,0.882725,1.53968,0.040183,-0.191951,0.440763,d9c6f19f958643e9,2.443576,0.687697,1.195829,0.599097,4.232649,0.659726,0.34375,1.469534,0.395894,0.078201,0.688411,0.315019,Bryce Mitchell,70.0,70.0,False,True,False,Red,6,0,6,0,1,1,0,4,28,0,54f64b5e283b0ce7,3.3263,0.495745,2.735509,0.63928,2.494725,0.372222,1.0,1.489388,1.452153,0.0,0.400439,0.755781,Ilia Topuria,67.0,69.0,True,False,False,Blue,4,0,4,3,0,0,0,1,25,0,3,3.0,1.0
5,c3ef3cb03edde8bb,2023-06-24,False,Blue,5,1500,5,Marc Goddard,False,False,True,False,False,False,False,False,False,False,False,False,False,0.844693,0.174763,0.180621,1.639813,1.899413,0.023985,0.174806,0.009705,-0.128663,-0.033691,-0.040433,1.030937,-0.158374,0.193525,-0.094846,-1.994259,0.013495,0.13812,-0.147709,fba03cd6cc28dc41,4.129742,0.37034,5.483899,0.585,0.525264,0.191602,0.732026,0.00618,0.594608,0.158374,0.098142,0.902908,Josh Emmett,66.0,70.0,True,False,False,Red,0,1,5,3,0,0,2,4,38,0,54f64b5e283b0ce7,4.034896,0.50846,3.48964,0.598494,1.369956,0.372222,0.906789,1.645992,1.625545,0.0,0.291667,0.755198,Ilia Topuria,67.0,69.0,True,False,False,Blue,5,0,5,3,1,0,0,1,26,0,12,-1.0,1.0
6,bec3154a11df3299,2024-02-17,True,Blue,2,512,5,Jason Herzog,False,False,True,False,False,False,False,False,False,False,False,False,False,-0.396967,0.219433,0.234647,0.517806,0.328992,0.142884,-0.085158,-0.023659,-0.157705,-0.119225,-0.020118,0.685041,-0.093851,-0.023581,0.171103,-0.157889,0.170764,-0.116209,0.186807,e1248941344b3288,5.242798,0.57258,3.641028,0.514226,2.0569,0.453414,0.687357,0.018301,0.248984,0.093851,0.215227,0.665458,Alexander Volkanovski,66.0,71.0,True,False,False,Red,0,1,12,5,0,0,1,7,35,0,54f64b5e283b0ce7,5.413901,0.456371,3.48314,0.68499,1.659933,0.688061,0.906789,0.536107,0.934024,0.0,0.191646,0.852265,Ilia Topuria,67.0,69.0,True,False,False,Blue,6,0,6,3,1,0,0,2,27,0,8,-1.0,2.0
7,ebf7cea27b83c432,2024-10-26,True,Red,3,694,5,Marc Goddard,False,False,True,False,False,False,False,False,False,False,False,False,False,-1.089607,-0.047017,-0.382415,-0.189967,1.590159,-0.263873,0.008922,0.171746,0.123125,0.092127,0.048527,-0.601887,0.0,-0.110074,2.234729,0.644571,-0.09912,0.094278,0.089364,54f64b5e283b0ce7,5.077105,0.455991,4.002748,0.650034,1.233931,0.688061,0.906789,0.398521,1.14544,0.0,0.143966,0.874639,Ilia Topuria,67.0,69.0,True,False,False,Red,7,0,7,4,1,0,0,2,27,0,150ff4cc642270b9,7.311835,0.550269,4.647318,0.550913,0.144324,0.305646,0.859773,0.208554,0.543553,0.0,0.033892,0.964004,Max Holloway,71.0,69.0,True,False,False,Blue,3,0,13,11,2,0,1,8,32,0,-5,-4.0,0.0
8,7a64d63e12618ba7,2025-06-28,True,Red,1,147,5,Marc Goddard,False,False,False,False,False,False,True,False,False,False,False,False,False,0.129251,-0.336443,-0.448697,1.725649,-0.771243,-0.059297,0.208229,0.084954,0.033863,-0.025656,-0.021281,-0.900491,0.295441,0.314545,-2.31679,-1.545548,-0.296297,0.184166,-0.179468,54f64b5e283b0ce7,5.559556,0.479883,4.972152,0.633408,1.700158,0.822222,0.906789,0.261876,1.197349,0.0,0.116836,0.907435,Ilia Topuria,67.0,69.0,True,False,False,Red,8,0,8,5,1,0,0,2,28,0,07225ba28ae309b6,3.242765,0.664049,3.426604,0.337111,1.829409,0.373525,0.570346,1.987524,0.296858,0.295441,0.431381,0.727968,Charles Oliveira,70.0,74.0,True,False,False,Blue,1,0,11,4,16,0,0,3,35,0,-7,-3.0,-5.0


### Final Touches

In [None]:
df1 = df1.sort_values(by=['event_date', 'fight_id'])

cols = ['fight_id', 'event_date', 'total_duration', 'end_round', 'referee', 'R_name', 'B_name', 'R_fighter_color', 'B_fighter_color' , 'R_fighter_id', 'B_fighter_id']
df1 = df1.drop(cols, axis=1)
df1 = df1.rename(columns={'winner_color': 'winner', 'rounds_scheduled': 'no_of_rounds'})
df1.columns = df1.columns.str.lower()

df1.head(10)

Unnamed: 0,is_title_fight,winner,no_of_rounds,weight_class_bantamweight,weight_class_catch_weight,weight_class_featherweight,weight_class_flyweight,weight_class_heavyweight,weight_class_light_heavyweight,weight_class_lightweight,weight_class_middleweight,weight_class_welterweight,weight_class_womens_bantamweight,weight_class_womens_featherweight,weight_class_womens_flyweight,weight_class_womens_strawweight,delta_tdavg,delta_tddef,delta_tdacc,delta_subavg,net_str_eff,delta_head_ratio,delta_head_acc,delta_body_ratio,delta_body_acc,delta_leg_ratio,delta_leg_acc,delta_knockdown_avg,delta_reversal_avg,delta_ctrl_time_pct,delta_slpm,delta_sapm,delta_strdef,delta_stracc,delta_str_eff,r_w_slpm,r_w_stracc,r_w_sapm,r_w_strdef,r_w_tdavg,r_w_tdacc,r_w_tddef,r_w_subavg,r_w_knockdown_avg,r_w_reversal_avg,r_w_ctrl_time_pct,r_w_str_eff,r_height,r_reach,r_stance_orthodox,r_stance_southpaw,r_stance_switch,r_win_streak,r_lose_streak,r_longest_win_streak,r_wins_by_ko_tko,r_wins_by_sub,r_wins_by_m_dec,r_wins_by_s_dec,r_wins_by_u_dec,r_age,r_is_debut,b_w_slpm,b_w_stracc,b_w_sapm,b_w_strdef,b_w_tdavg,b_w_tdacc,b_w_tddef,b_w_subavg,b_w_knockdown_avg,b_w_reversal_avg,b_w_ctrl_time_pct,b_w_str_eff,b_height,b_reach,b_stance_orthodox,b_stance_southpaw,b_stance_switch,b_win_streak,b_lose_streak,b_longest_win_streak,b_wins_by_ko_tko,b_wins_by_sub,b_wins_by_m_dec,b_wins_by_s_dec,b_wins_by_u_dec,b_age,b_is_debut,delta_age,delta_height,delta_reach
0,False,Red,3,False,False,False,False,False,False,False,True,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,75.0,77.0,True,False,False,0,0,0,0,0,0,0,0,30,1,,,,,,,,,,,,,73.0,74.0,True,False,False,0,0,0,0,0,0,0,0,25,1,5,2.0,3.0
1,False,Red,3,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,68.0,74.0,True,False,False,0,0,0,0,0,0,0,0,30,1,,,,,,,,,,,,,62.0,74.0,False,True,False,0,0,0,0,0,0,0,0,32,1,-2,6.0,0.0
2,False,Red,3,False,False,False,False,True,False,False,False,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,73.0,78.0,True,False,False,0,0,0,0,0,0,0,0,26,1,,,,,,,,,,,,,75.0,78.0,True,False,False,0,0,0,0,0,0,0,0,23,1,3,-2.0,0.0
3,True,Red,5,False,False,False,False,False,False,True,False,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,67.0,70.0,False,True,False,0,0,0,0,0,0,0,0,26,1,,,,,,,,,,,,,67.0,70.0,False,True,False,0,0,0,0,0,0,0,0,25,1,1,0.0,0.0
4,False,Red,2,False,False,False,False,False,False,False,True,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,69.0,72.0,True,False,False,0,0,0,0,0,0,0,0,24,1,,,,,,,,,,,,,73.0,75.0,False,True,False,0,0,0,0,0,0,0,0,27,1,-3,-4.0,-3.0
5,False,Red,2,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,66.0,67.0,True,False,False,0,0,0,0,0,0,0,0,27,1,,,,,,,,,,,,,70.0,74.0,True,False,False,0,0,0,0,0,0,0,0,24,1,3,-4.0,-7.0
6,True,Red,5,False,False,False,False,False,False,False,True,False,False,False,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,75.0,74.0,True,False,False,0,0,0,0,0,0,0,0,26,1,,,,,,,,,,,,,72.0,74.0,True,False,False,0,0,0,0,0,0,0,0,30,1,-4,3.0,0.0
7,False,Red,3,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,70.0,74.0,False,True,False,0,0,0,0,0,0,0,0,29,1,,,,,,,,,,,,,66.0,68.0,True,False,False,0,0,0,0,0,0,0,0,26,1,3,4.0,6.0
8,True,Red,5,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,69.0,74.0,True,False,False,0,0,0,0,0,0,0,0,24,1,,,,,,,,,,,,,70.0,74.0,True,False,False,0,0,0,0,0,0,0,0,33,1,-9,-1.0,0.0
9,False,Red,3,False,False,False,False,False,False,False,False,True,False,False,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,72.0,70.0,False,True,False,0,0,0,0,0,0,0,0,26,1,,,,,,,,,,,,,71.0,74.0,True,False,False,0,0,0,0,0,0,0,0,27,1,-1,1.0,-4.0


### Save dataset

In [None]:
# df1.to_parquet("data.parquet", engine="pyarrow", compression="snappy", index=False)
