In [23]:
# Import necessary libraries
import pandas as pd

# Load the fight results data
df = pd.read_csv('ufc_fight_results.csv')
df2 = pd.read_csv('ufc_fighter_tott_clean.csv')

# Display the first few rows of the dataframe
df.head(5)

Unnamed: 0,EVENT,BOUT,OUTCOME,WEIGHTCLASS,METHOD,ROUND,TIME,TIME FORMAT,REFEREE,DETAILS,URL
0,UFC 322: Della Maddalena vs. Makhachev,Jack Della Maddalena vs. Islam Makhachev,L/W,UFC Welterweight Title Bout,Decision - Unanimous,5,5:00,5 Rnd (5-5-5-5-5),Herb Dean,Derek Cleary 45 - 50.Sal D'amato 45 - 50.Ron M...,http://ufcstats.com/fight-details/856f6ee04937...
1,UFC 322: Della Maddalena vs. Makhachev,Valentina Shevchenko vs. Zhang Weili,W/L,UFC Women's Flyweight Title Bout,Decision - Unanimous,5,5:00,5 Rnd (5-5-5-5-5),Marc Goddard,Chris Bolinski 45 - 50.Eric Colon 45 - 50.Chri...,http://ufcstats.com/fight-details/7606f6b6c19f...
2,UFC 322: Della Maddalena vs. Makhachev,Sean Brady vs. Michael Morales,L/W,Welterweight Bout,KO/TKO,1,3:27,3 Rnd (5-5-5),Blake Grice,Punches to Head At Distance,http://ufcstats.com/fight-details/477af74ea3cb...
3,UFC 322: Della Maddalena vs. Makhachev,Leon Edwards vs. Carlos Prates,L/W,Welterweight Bout,KO/TKO,2,1:28,3 Rnd (5-5-5),Keith Peterson,Punch to Head At Distance,http://ufcstats.com/fight-details/3c6088485376...
4,UFC 322: Della Maddalena vs. Makhachev,Beneil Dariush vs. Benoit Saint Denis,L/W,Lightweight Bout,KO/TKO,1,0:16,3 Rnd (5-5-5),Herb Dean,Punch to Head At Distance,http://ufcstats.com/fight-details/9b5f190cd3f5...


In [24]:
# Separate BOUT columns into FIGHTER1 and FIGHTER2
df[['FIGHTER1', 'FIGHTER2']] = df['BOUT'].str.split(' vs. ', expand=True)

# Trim whitespace from fighter names
df['FIGHTER1'] = df['FIGHTER1'].str.strip()
df['FIGHTER2'] = df['FIGHTER2'].str.strip()

# Function to assign winner and loser
def assign_winner_loser(row):
    if row['OUTCOME'] == 'W/L': # Winner is FIGHTER1
        return pd.Series([row['FIGHTER1'], row['FIGHTER2']])
    elif row['OUTCOME'] == 'L/W': # Winner is FIGHTER2
        return pd.Series([row['FIGHTER2'], row['FIGHTER1']])
    else: # Draw or No Contest
        return pd.Series([None, None])

# Apply the function to assign WINNER and LOSER
df[['WINNER', 'LOSER']] = df.apply(assign_winner_loser, axis=1)

# Assign METHOD Decision/KO/Sub
def assign_method(row):
    if 'Decision' in row['METHOD']:
        return 'Decision'
    elif 'KO/TKO' in row['METHOD'] or 'TKO' in row['METHOD']:
        return 'KO/TKO'
    elif 'Submission' in row['METHOD']:
        return 'Submission'
    elif 'Overturned' in row['METHOD']:
        return 'NC'
    else:
        return row['METHOD']
    
# Apply the function to assign METHOD type
df['METHOD'] = df.apply(assign_method, axis=1)

# Merge AGE from df2 into df for both fighters
df = df.merge(df2[['FIGHTER', 'AGE']], left_on='WINNER', right_on='FIGHTER', how='left').drop(columns= 'FIGHTER').rename(columns={'AGE':'AGE1'})
df = df.merge(df2[['FIGHTER','AGE']], left_on='LOSER', right_on='FIGHTER', how='left').drop(columns= 'FIGHTER').rename(columns={'AGE':'AGE2'})

# Calculate absolute age difference
df['AGE.DIFF.WINNER'] = df['AGE1'] - df['AGE2']

# Drop unnecessary columns
df = df.drop(columns=['OUTCOME', 'TIME FORMAT', 'REFEREE', 'URL', 'FIGHTER1', 'FIGHTER2', 'AGE1', 'AGE2'])

# Save the cleaned dataframe to a new CSV file
df.to_csv('ufc_fight_results_clean.csv', index=False)

# Display the first few rows of the updated dataframe
df.head(5)

Unnamed: 0,EVENT,BOUT,WEIGHTCLASS,METHOD,ROUND,TIME,DETAILS,WINNER,LOSER,AGE.DIFF.WINNER
0,UFC 322: Della Maddalena vs. Makhachev,Jack Della Maddalena vs. Islam Makhachev,UFC Welterweight Title Bout,Decision,5,5:00,Derek Cleary 45 - 50.Sal D'amato 45 - 50.Ron M...,Islam Makhachev,Jack Della Maddalena,4.876712
1,UFC 322: Della Maddalena vs. Makhachev,Valentina Shevchenko vs. Zhang Weili,UFC Women's Flyweight Title Bout,Decision,5,5:00,Chris Bolinski 45 - 50.Eric Colon 45 - 50.Chri...,Valentina Shevchenko,Zhang Weili,1.435616
2,UFC 322: Della Maddalena vs. Makhachev,Sean Brady vs. Michael Morales,Welterweight Bout,KO/TKO,1,3:27,Punches to Head At Distance,Michael Morales,Sean Brady,-6.586301
3,UFC 322: Della Maddalena vs. Makhachev,Leon Edwards vs. Carlos Prates,Welterweight Bout,KO/TKO,2,1:28,Punch to Head At Distance,Carlos Prates,Leon Edwards,-1.980822
4,UFC 322: Della Maddalena vs. Makhachev,Beneil Dariush vs. Benoit Saint Denis,Lightweight Bout,KO/TKO,1,0:16,Punch to Head At Distance,Benoit Saint Denis,Beneil Dariush,-6.621918
