# Import Packages and Data

In [32]:
import pandas as pd
import numpy as np

In [33]:
# Import the CSV to fighter and fight
fighter = pd.read_csv('../raw_data/raw_fighter_details.csv')
data = pd.read_csv('../raw_data/data.csv')

# View Data and Drop Uneccesary Cols

In [34]:
data.head(2)

Unnamed: 0,R_fighter,B_fighter,Referee,date,location,Winner,title_bout,weight_class,B_avg_KD,B_avg_opp_KD,...,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age
0,Adrian Yanez,Gustavo Lopez,Chris Tognoni,2021-03-20,"Las Vegas, Nevada, USA",Red,False,Bantamweight,0.0,0.0,...,0,1,0,0,Orthodox,170.18,177.8,135.0,31.0,27.0
1,Trevin Giles,Roman Dolidze,Herb Dean,2021-03-20,"Las Vegas, Nevada, USA",Red,False,Middleweight,0.5,0.0,...,0,3,0,0,Orthodox,182.88,187.96,185.0,32.0,28.0


In [35]:
# Drop uneccesary columns as per the pre-processing notebook
data.drop(columns=['Referee','date','location','Winner','title_bout','weight_class','B_age','R_age'], inplace=True)
data.shape

(6012, 136)

# Create Characteristics Dataframe

In [36]:
# Separate blue and red fighters
blue_fighter = data[[col for col in data.columns if col.startswith('B_')]]
red_fighter = data[[col for col in data.columns if col.startswith('R_')]]

# Remove R or B from column name
blue_fighter.columns = [col.replace('B_', '') for col in blue_fighter.columns]
red_fighter.columns = [col.replace('R_', '')  for col in red_fighter.columns]

# Ensure columns for the red_fighter and blue_fighter dataset are in same order for concat
columnsTitles = blue_fighter.columns
red_fighter = red_fighter.reindex(columns=columnsTitles)

# Create fighters dataframe
fighters = pd.concat([blue_fighter, red_fighter], axis=0, sort=False)

# Remove duplicates keeping first fighter only
fighters = fighters.drop_duplicates(subset='fighter', keep='first')

In [37]:
fighters

Unnamed: 0,fighter,avg_KD,avg_opp_KD,avg_SIG_STR_pct,avg_opp_SIG_STR_pct,avg_TD_pct,avg_opp_TD_pct,avg_SUATT,avg_opp_SUATT,avg_REV,...,win_by_Decision_Majority,win_by_Decision_Split,win_by_Decision_Unanimous,win_by_KO/TKO,win_by_Submission,win_by_TKO_Doctor_Stoppage,Stance,Height_cms,Reach_cms,Weight_lbs
0,Gustavo Lopez,0.000,0.0,0.420000,0.49500,0.330,0.36000,0.500,1.0000,0.00,...,0,0,0,0,1,0,Orthodox,165.10,170.18,135.0
1,Roman Dolidze,0.500,0.0,0.660000,0.30500,0.300,0.50000,1.500,0.0000,0.00,...,0,1,0,1,0,0,Orthodox,187.96,193.04,205.0
2,Harry Hunsucker,,,,,,,,,,...,0,0,0,0,0,0,Orthodox,187.96,190.50,241.0
3,Montserrat Conejo,,,,,,,,,,...,0,0,0,0,0,0,Southpaw,152.40,154.94,115.0
4,Macy Chiasson,0.125,0.0,0.535625,0.57875,0.185,0.16625,0.125,0.1875,0.25,...,0,0,1,2,1,0,Orthodox,180.34,182.88,135.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5939,Rafael Carino,,,,,,,,,,...,0,0,0,0,0,0,Orthodox,203.20,,245.0
5940,Mark Schultz,,,,,,,,,,...,0,0,0,0,0,0,Orthodox,175.26,,200.0
5964,Scott Bessac,,,,,,,,,,...,0,0,0,0,0,0,,193.04,,245.0
5980,Jon Hess,,,,,,,,,,...,0,0,0,0,0,0,,200.66,,295.0


# Create Current Age

In [38]:
# Create DataFrame with fighters current age. 
dob = fighter.copy()[['fighter_name', 'DOB']]
dob['DOB'] = pd.to_datetime(dob['DOB'])
dob['age'] = pd.to_datetime(pd.Timestamp("today").strftime("%Y-%m-%d")) - dob['DOB']

# Year has 365 days, account for leap years, add 0.25. Day has 24 * 60 min, min has 60s. Multiply to get seconds in year.
dob['age'] = dob['age'].apply(lambda x: x.total_seconds() / (365.25*24*60*60))

dob = dob.drop(columns=['DOB'])
dob.columns = ['fighter','age']

In [39]:
fighters = pd.merge(fighters, dob, how='left', on='fighter')

# Create Function to Select Fighters and Create DF

In [92]:
# Create function to select fighters and output dataframe for model

def select_fighters(red_name, blue_name):
    # Obtain rows from fighters dataset for each fighter name
    red_fighter = fighters[fighters['fighter'] == red_name]
    blue_fighter = fighters[fighters['fighter'] == blue_name]
    
    # Rename columns to include R_ or B_
    red_fighter.columns = [f"R_{col}" for col in red_fighter.columns]
    blue_fighter.columns = [f"B_{col}" for col in blue_fighter.columns]
    
    # Output dataframe for model
    df = pd.concat([red_fighter.reset_index(drop=True), blue_fighter.reset_index(drop=True)], axis=1)
    df.drop(columns=['R_fighter','B_fighter'], inplace=True)
    return df

In [93]:
# Example fighter selection - now what needs to be added by user is title_bout and weight_class and this will match
# the shape of the dataframe in the max_preprocessing_v3

model_input = select_fighters('Roman Dolidze','Gustavo Lopez')

In [94]:
model_input

Unnamed: 0,R_avg_KD,R_avg_opp_KD,R_avg_SIG_STR_pct,R_avg_opp_SIG_STR_pct,R_avg_TD_pct,R_avg_opp_TD_pct,R_avg_SUATT,R_avg_opp_SUATT,R_avg_REV,R_avg_opp_REV,...,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_Stance,B_Height_cms,B_Reach_cms,B_Weight_lbs,B_age
0,0.5,0.0,0.66,0.305,0.3,0.5,1.5,0.0,0.0,0.0,...,0,0,0,1,0,Orthodox,165.1,170.18,135.0,34.724162
