Creates dataframe of individual fight data using the information in the json files

In [51]:
import pandas as pd 
import json 
import os 
import numpy as np

folder_path = 'events'  
fight_rows = []

# Loop through all JSON files
for filename in os.listdir(folder_path):
    if filename.endswith('.json'):
        with open(os.path.join(folder_path, filename), 'r') as f:
            data = json.load(f)

        date = data.get("date")
        event = data.get("event")

        for key, fight in data.items():
            if isinstance(fight, dict) and 'fighter1' in fight and 'fighter2' in fight:
                f1_data = fight.get('fighter1_fight_data') or {}
                f2_data = fight.get('fighter2_fight_data') or {}

                row = {
                    'event': event,
                    'date': date,
                    'fight_name': fight.get('name'),
                    'f1': fight.get('fighter1'),
                    'f2': fight.get('fighter2'),
                    'winner': fight.get('winner'),
                    'method': fight.get('method'),
                    'fight_length': fight.get('fight_length')
                }

                # Add all fighter1 stats with prefix f1_
                for stat, value in f1_data.items():
                    row[f'f1_{stat}'] = value

                # Add all fighter2 stats with prefix f2_
                for stat, value in f2_data.items():
                    row[f'f2_{stat}'] = value

                fight_rows.append(row)

# Create DataFrame
fight_df = pd.DataFrame(fight_rows)
fight_df['date'] = pd.to_datetime(fight_df['date'], errors='coerce')

fight_df.sort_values(by='date', inplace=True)
fight_df.to_csv('fight_df.csv')

Create a new dataframe of features to analyze

In [41]:

feature_df = pd.DataFrame()


feature_df['f1'] = fight_df['f1']
feature_df['f2'] = fight_df['f2']
feature_df['winner'] = fight_df['winner']



Prefight elo column

In [None]:
# Elo parameters
BASE_ELO = 1500
K = 64  # Adjust based on volatility you want

def expected_score(rating_a, rating_b):
    return 1 / (1 + 10 ** ((rating_b - rating_b) / 400))

def update_elo(winner_elo, loser_elo, k=K):
    expected_win = expected_score(winner_elo, loser_elo)
    change = k * (1 - expected_win)
    return winner_elo + change, loser_elo - change

# keeps track of each fighter's current elo
elo_ratings = {}

# Store Elo ratings at fight time
f1_elo_list = []
f2_elo_list = []

# Iterate over fights chronologically
for idx, row in fight_df.iterrows():
    f1 = row['f1']
    f2 = row['f2']
    winner = row['winner']

    # Get current Elo or assign base
    f1_elo = elo_ratings.get(f1, BASE_ELO)
    f2_elo = elo_ratings.get(f2, BASE_ELO)

    # Save pre-fight Elos
    f1_elo_list.append(f1_elo)
    f2_elo_list.append(f2_elo)

    # Update Elo ratings based on outcome
    if winner == f1:
        new_f1_elo, new_f2_elo = update_elo(f1_elo, f2_elo)
    elif winner == f2:
        new_f2_elo, new_f1_elo = update_elo(f2_elo, f1_elo)
    else:
        # Draw — both get partial points
        expected_f1 = expected_score(f1_elo, f2_elo)
        expected_f2 = expected_score(f2_elo, f1_elo)
        new_f1_elo = f1_elo + K * (0.5 - expected_f1)
        new_f2_elo = f2_elo + K * (0.5 - expected_f2)

    # Save updated Elos
    elo_ratings[f1] = new_f1_elo
    elo_ratings[f2] = new_f2_elo








Win, loss, winrate, and aggregate stats

In [58]:
# Holds stats
win_counter = {}
f1_win_list = []
f2_win_list = []

loss_counter = {}
f1_loss_list = []
f2_loss_list = []

f1_fight_list = []
f2_fight_list = []

fight_counter = {}
f1_winrate_list = []
f2_winrate_list = []

kd_counter = {}
f1_avg_kd_list = []
f2_avg_kd_list = []

sig_str_landed_counter = {} 
f1_avg_sig_str_landed_list = []
f2_avg_sig_str_landed_list = []

str_landed_counter = {}
f1_avg_str_landed_list = []
f2_avg_str_landed_list = []

td_comp_counter = {}
f1_avg_td_comp_list = []
f2_avg_td_comp_list = []

ctrl_time_counter = {}
f1_avg_ctrl_time_list = []
f2_avg_ctrl_time_list = []







for _, row in fight_df.iterrows():
    ## Get stats ##
    f1 = row['f1']
    f2 = row['f2']
    winner = row['winner']
    # Get fighters total fights
    f1_fights = fight_counter.get(f1, 0)
    f2_fights = fight_counter.get(f2, 0)
    # Get fighters wins and losses
    f1_wins = win_counter.get(f1, 0)
    f1_losses = loss_counter.get(f1, 0) 
    f2_wins = win_counter.get(f2, 0) 
    f2_losses = loss_counter.get(f2, 0) 
    # Get fighters knockdowns
    f1_kd = kd_counter.get(f1, 0)
    f2_kd = kd_counter.get(f2, 0)
    # Get fighters significant strikes landed
    f1_sig_str_landed = sig_str_landed_counter.get(f1, 0)
    f2_sig_str_landed = sig_str_landed_counter.get(f2, 0)
    # Get fighters total strikes landed
    f1_str_landed = str_landed_counter.get(f1, 0) 
    f2_str_landed = str_landed_counter.get(f2, 0) 
    # Get fighters completed takedowns 
    f1_td_comp = td_comp_counter.get(f1, 0) 
    f2_td_comp = td_comp_counter.get(f2, 0) 
    # Get fighters control time 
    f1_ctrl_time = ctrl_time_counter.get(f1, 0) 
    f2_ctrl_time = ctrl_time_counter.get(f2, 0)

    
    ## Add stats ## 
    # Add wins, lasses, and total fights
    f1_win_list.append(f1_wins)
    f1_loss_list.append(f1_losses)
    f2_win_list.append(f2_wins)
    f2_loss_list.append(f2_losses)
    f1_fight_list.append(f1_fights)
    f2_fight_list.append(f2_fights)
    # Add winrate
    if f1_fights != 0:
        f1_winrate_list.append(f1_wins / f1_fights)
    else: 
        f1_winrate_list.append(0.0)
        
    if f2_fights != 0:    
        f2_winrate_list.append(f2_wins / f2_fights)
    else: 
        f2_winrate_list.append(0.0)
    # Add average knockdowns
    if f1_fights != 0:
        f1_avg_kd_list.append(f1_kd / f1_fights)
    else: 
        f1_avg_kd_list.append(0)
    if f2_fights != 0:    
        f2_avg_kd_list.append(f2_kd / f2_fights)
    else: 
        f2_avg_kd_list.append(0)
    # Add average significant strikes landed
    if f1_fights != 0:
        f1_avg_sig_str_landed_list.append(f1_sig_str_landed / f1_fights)
    else: 
        f1_avg_sig_str_landed_list.append(0)
    if f2_fights != 0:    
        f2_avg_sig_str_landed_list.append(f2_sig_str_landed / f2_fights)
    else: 
        f2_avg_sig_str_landed_list.append(0)
    # Add average strikes landed
    if f1_fights != 0:
        f1_avg_str_landed_list.append(f1_str_landed / f1_fights)
    else: 
        f1_avg_str_landed_list.append(0)
    if f2_fights != 0:    
        f2_avg_str_landed_list.append(f2_str_landed / f2_fights)
    else: 
        f2_avg_str_landed_list.append(0)
    # Add average completed takedowns 
    if f1_fights != 0:
        f1_avg_td_comp_list.append(f1_td_comp / f1_fights)
    else: 
        f1_avg_td_comp_list.append(0)
    if f2_fights != 0:    
        f2_avg_td_comp_list.append(f2_td_comp / f2_fights)
    else: 
        f2_avg_td_comp_list.append(0)
    # Add average control time 
    if f1_fights != 0:
        f1_avg_ctrl_time_list.append(f1_ctrl_time / f1_fights)
    else: 
        f1_avg_ctrl_time_list.append(0)
    if f2_fights != 0:    
        f2_avg_ctrl_time_list.append(f2_ctrl_time / f2_fights)
    else: 
        f2_avg_ctrl_time_list.append(0)


    
    ## Update stats ## 
    # Updates wins, losses, and total fights
    if winner == f1:
        win_counter[f1] = win_counter.get(f1, 0) + 1
        loss_counter[f2] = loss_counter.get(f2, 0) + 1 
    
    elif winner == f2: 
        win_counter[f2] = win_counter.get(f2, 0) + 1
        loss_counter[f1] = loss_counter.get(f1, 0) + 1 
    
    fight_counter[f1] = fight_counter.get(f1, 0) + 1 
    fight_counter[f2] = fight_counter.get(f2, 0) + 1 
    # Updates kds
    kd_counter[f1] = kd_counter.get(f1, 0) + row['f1_kd']
    kd_counter[f2] = kd_counter.get(f2, 0) + row['f2_kd']
    # Updates significant strikes landed
    sig_str_landed_counter[f1] = sig_str_landed_counter.get(f1, 0) + row['f1_sig_str_landed']
    sig_str_landed_counter[f2] = sig_str_landed_counter.get(f2, 0) + row['f2_sig_str_landed']
    # Updates strikes landed
    str_landed_counter[f1] = str_landed_counter.get(f1, 0) + row['f1_str_landed']
    str_landed_counter[f2] = str_landed_counter.get(f2, 0) + row['f2_str_landed']
    # Updates takedowns completed
    td_comp_counter[f1] = td_comp_counter.get(f1, 0) + row['f1_td_comp']
    td_comp_counter[f2] = td_comp_counter.get(f2, 0) + row['f2_td_comp']
    # Updates takedowns completed
    ctrl_time_counter[f1] = ctrl_time_counter.get(f1, 0) + row['f1_ctrl_time']
    ctrl_time_counter[f2] = ctrl_time_counter.get(f2, 0) + row['f2_ctrl_time']


    
## Add to data frame ## 
# fighter 1 
feature_df['f1_winrate'] = f1_winrate_list
feature_df['f1_wins'] = f1_win_list
feature_df['f1_losses'] = f1_loss_list
feature_df['f1_fights'] = f1_fight_list
feature_df['f1_avg_kd'] = f1_avg_kd_list
feature_df['f1_avg_sig_str_landed'] = f1_avg_sig_str_landed_list
feature_df['f1_avg_str_landed'] = f1_avg_str_landed_list
feature_df['f1_avg_td_comp'] = f1_avg_td_comp_list
feature_df['f1_avg_ctrl_time'] = f1_avg_ctrl_time_list
feature_df['f1_elo_pre'] = f1_elo_list

# fighter 2 
feature_df['f2_winrate'] = f2_winrate_list
feature_df['f2_wins'] = f2_win_list
feature_df['f2_losses'] = f2_loss_list
feature_df['f2_fights'] = f2_fight_list
feature_df['f2_avg_kd'] = f2_avg_kd_list
feature_df['f2_avg_sig_str_landed'] = f2_avg_sig_str_landed_list
feature_df['f2_avg_str_landed'] = f2_avg_str_landed_list
feature_df['f2_avg_td_comp'] = f2_avg_td_comp_list
feature_df['f2_avg_ctrl_time'] = f2_avg_ctrl_time_list
feature_df['f2_elo_pre'] = f2_elo_list

feature_df.to_csv('feature_df.cvs')



Get the fights where both fighters have fought atleast 3 times 

In [None]:
feature_df = feature_df[(feature_df['f1_fights'] >= 3) & (feature_df['f2_fights'] >= 3)]  
