# Import Packages and Data

In [108]:
import pandas as pd
import numpy as np

In [109]:
# Import the CSV to fighter and fight
data = pd.read_csv('../ufc_data_raw_main/data.csv')

# View Data and Drop Uneccesary Cols

In [110]:
data.head(2)

Unnamed: 0,R_fighter,B_fighter,Referee,date,location,Winner,title_bout,weight_class,B_avg_KD,B_avg_opp_KD,...,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age
0,Adrian Yanez,Gustavo Lopez,Chris Tognoni,2021-03-20,"Las Vegas, Nevada, USA",Red,False,Bantamweight,0.0,0.0,...,0,1,0,0,Orthodox,170.18,177.8,135.0,31.0,27.0
1,Trevin Giles,Roman Dolidze,Herb Dean,2021-03-20,"Las Vegas, Nevada, USA",Red,False,Middleweight,0.5,0.0,...,0,3,0,0,Orthodox,182.88,187.96,185.0,32.0,28.0


In [111]:
# Drop uneccesary columns as per the pre-processing notebook
data.drop(columns=['Referee','date','location','Winner','title_bout','weight_class'], inplace=True)
data.shape

(6012, 138)

# Create Characteristics Dataframe

In [148]:
# Separate blue and red fighters
blue_fighter = data[[col for col in data.columns if col.startswith('B_')]]
red_fighter = data[[col for col in data.columns if col.startswith('R_')]]

# Remove R or B from column name
blue_fighter.columns = [col.replace('B_', '') for col in blue_fighter.columns]
red_fighter.columns = [col.replace('R_', '')  for col in red_fighter.columns]

# Ensure columns for the red_fighter and blue_fighter dataset are in same order for concat
columnsTitles = blue_fighter.columns
red_fighter = red_fighter.reindex(columns=columnsTitles)

# Create fighters dataframe
fighters = pd.concat([blue_fighter, red_fighter], axis=0, sort=False)

# Remove duplicates keeping first fighter only
fighters = fighters.drop_duplicates(subset='fighter', keep='first')

In [151]:
fighters.head()

Unnamed: 0,fighter,avg_KD,avg_opp_KD,avg_SIG_STR_pct,avg_opp_SIG_STR_pct,avg_TD_pct,avg_opp_TD_pct,avg_SUATT,avg_opp_SUATT,avg_REV,...,win_by_Decision_Split,win_by_Decision_Unanimous,win_by_KO/TKO,win_by_Submission,win_by_TKO_Doctor_Stoppage,Stance,Height_cms,Reach_cms,Weight_lbs,age
0,Gustavo Lopez,0.0,0.0,0.42,0.495,0.33,0.36,0.5,1.0,0.0,...,0,0,0,1,0,Orthodox,165.1,170.18,135.0,31.0
1,Roman Dolidze,0.5,0.0,0.66,0.305,0.3,0.5,1.5,0.0,0.0,...,1,0,1,0,0,Orthodox,187.96,193.04,205.0,32.0
2,Harry Hunsucker,,,,,,,,,,...,0,0,0,0,0,Orthodox,187.96,190.5,241.0,32.0
3,Montserrat Conejo,,,,,,,,,,...,0,0,0,0,0,Southpaw,152.4,154.94,115.0,28.0
4,Macy Chiasson,0.125,0.0,0.535625,0.57875,0.185,0.16625,0.125,0.1875,0.25,...,0,1,2,1,0,Orthodox,180.34,182.88,135.0,29.0


# Create Function to Select Fighters and Create DF

In [152]:
# Create function to select fighters and output dataframe for model

def select_fighters(red_name, blue_name):
    # Obtain rows from fighters dataset for each fighter name
    red_fighter = fighters[fighters['fighter'] == red_name]
    blue_fighter = fighters[fighters['fighter'] == blue_name]
    
    # Rename columns to include R_ or B_
    red_fighter.columns = ["R_"+col for col in red_fighter.columns]
    blue_fighter.columns = ["B_"+col for col in blue_fighter.columns]
    
    # Output dataframe for model
    df = blue_fighter.join(red_fighter)
    df.drop(columns=['R_fighter','B_fighter'], inplace=True)
    return df

In [154]:
# Example fighter selection - now what needs to be added by user is title_bout and weight_class and this will match
# the shape of the dataframe in the max_preprocessing_v3

select_fighters('Gustavo Lopez','Roman Dolidze')

Unnamed: 0,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUATT,B_avg_opp_SUATT,B_avg_REV,B_avg_opp_REV,...,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,R_age
1,0.5,0.0,0.66,0.305,0.3,0.5,1.5,0.0,0.0,0.0,...,,,,,,,,,,
