Creates dataframe using the information in the json files

In [9]:
import pandas as pd 
import json 
import os 

folder_path = 'events'  
fight_rows = []

# Loop through all JSON files
for filename in os.listdir(folder_path):
    if filename.endswith('.json'):
        with open(os.path.join(folder_path, filename), 'r') as f:
            data = json.load(f)

        date = data.get("date")
        event = data.get("event")

        for key, fight in data.items():
            if isinstance(fight, dict) and 'fighter1' in fight and 'fighter2' in fight:
                f1_data = fight.get('fighter1_fight_data') or {}
                f2_data = fight.get('fighter2_fight_data') or {}

                row = {
                    'event': event,
                    'date': date,
                    'fight_name': fight.get('name'),
                    'f1': fight.get('fighter1'),
                    'f2': fight.get('fighter2'),
                    'winner': fight.get('winner'),
                    'method': fight.get('method'),
                    'fight_length': fight.get('fight_length')
                }

                # Add all fighter1 stats with prefix f1_
                for stat, value in f1_data.items():
                    row[f'f1_{stat}'] = value

                # Add all fighter2 stats with prefix f2_
                for stat, value in f2_data.items():
                    row[f'f2_{stat}'] = value

                fight_rows.append(row)

# Create DataFrame
df = pd.DataFrame(fight_rows)
df['date'] = pd.to_datetime(df['date'], errors='coerce')



Adds elo rating at the time of the fight columns of both fighters

In [10]:
# Elo parameters
BASE_ELO = 1500
K = 64  # Adjust based on volatility you want

def expected_score(rating_a, rating_b):
    return 1 / (1 + 10 ** ((rating_b - rating_b) / 400))

def update_elo(winner_elo, loser_elo, k=K):
    expected_win = expected_score(winner_elo, loser_elo)
    change = k * (1 - expected_win)
    return winner_elo + change, loser_elo - change

# keeps track of each fighter's current elo
elo_ratings = {}

# Store Elo ratings at fight time
f1_elo_list = []
f2_elo_list = []

# Iterate over fights chronologically
df = df.sort_values(by='date')  # Make sure fights are in order
for idx, row in df.iterrows():
    f1 = row['f1']
    f2 = row['f2']
    winner = row['winner']

    # Get current Elo or assign base
    f1_elo = elo_ratings.get(f1, BASE_ELO)
    f2_elo = elo_ratings.get(f2, BASE_ELO)

    # Save pre-fight Elos
    f1_elo_list.append(f1_elo)
    f2_elo_list.append(f2_elo)

    # Update Elo ratings based on outcome
    if winner == f1:
        new_f1_elo, new_f2_elo = update_elo(f1_elo, f2_elo)
    elif winner == f2:
        new_f2_elo, new_f1_elo = update_elo(f2_elo, f1_elo)
    else:
        # Draw — both get partial points
        expected_f1 = expected_score(f1_elo, f2_elo)
        expected_f2 = expected_score(f2_elo, f1_elo)
        new_f1_elo = f1_elo + K * (0.5 - expected_f1)
        new_f2_elo = f2_elo + K * (0.5 - expected_f2)

    # Save updated Elos
    elo_ratings[f1] = new_f1_elo
    elo_ratings[f2] = new_f2_elo

# Add to DataFrame
df['f1_elo_pre'] = f1_elo_list
df['f2_elo_pre'] = f2_elo_list






Add finishes columns

In [23]:
finish_counter = {}
f1_finish_list = []
f2_finish_list = []


df = df.sort_values(by='date')
for _, row in df.iterrows():
    f1 = row['f1']
    f2 = row['f2']
    winner = row['winner']

    method = row['method']

    f1_finishes = finish_counter.get(f1, 0)
    f2_finishes = finish_counter.get(f2, 0)
    
    f1_finish_list.append(f1_finishes)
    f2_finish_list.append(f2_finishes)

    if 'Decision' not in method: 
        finish_counter[winner] = finish_counter.get(winner, 0) + 1
df['f1_finishes'] = f1_finish_list
df['f2_finishes'] = f2_finish_list



    


In [24]:
win_counter = {}
f1_win_list = []
f2_win_list = []

loss_counter = {}
f1_loss_list = []
f2_loss_list = []

f1_fight_list = []
f2_fight_list = []

fight_counter = {}
f1_winrate_list = []
f2_winrate_list = []

for _, row in df.iterrows():
    f1 = row['f1']
    f2 = row['f2']
    winner = row['winner']

    # Get fighters wins and losses
    f1_wins = win_counter.get(f1, 0)
    f1_losses = loss_counter.get(f1, 0) 
    f2_wins = win_counter.get(f2, 0) 
    f2_losses = loss_counter.get(f2, 0) 

    f1_win_list.append(f1_wins)
    f1_loss_list.append(f1_losses)
    f2_win_list.append(f2_wins)
    f2_loss_list.append(f2_losses)

    # Get fighters total fights
    f1_fights = fight_counter.get(f1, 0)
    f2_fights = fight_counter.get(f2, 0)

    f1_fight_list.append(f1_fights)
    f2_fight_list.append(f2_fights)
    

    if f1_fights != 0:
        f1_winrate_list.append(f1_wins / f1_fights)
    else: 
        f1_winrate_list.append(0)
        
    if f2_fights != 0:    
        f2_winrate_list.append(f2_wins / f2_fights)
    else: 
        f2_winrate_list.append(0)
    
    # Updates wins, losses, and total fights
    if winner == f1:
        win_counter[f1] = win_counter.get(f1, 0) + 1
        loss_counter[f2] = loss_counter.get(f2, 0) + 1 
    
    elif winner == f2: 
        win_counter[f2] = win_counter.get(f2, 0) + 1
        loss_counter[f1] = loss_counter.get(f1, 0) + 1 
    
    fight_counter[f1] = fight_counter.get(f1, 0) + 1 
    fight_counter[f2] = fight_counter.get(f2, 0) + 1 
    

df['f1_winrate'] = f1_winrate_list
df['f1_wins'] = f1_win_list
df['f1_losses'] = f1_loss_list
df['f1_fights'] = f1_fight_list

df['f2_winrate'] = f2_winrate_list
df['f2_wins'] = f2_win_list
df['f2_losses'] = f2_loss_list
df['f2_fights'] = f2_fight_list


df.to_csv('output.csv')

Clean null values

In [13]:
# df.dropna(subset=['f1_str_att'], inplace=True)
# df.to_csv('output.csv')