In [None]:
# Import our dependencies
import pandas as pd
import sklearn as skl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import tensorflow as tf

In [None]:
# Read in our raw data
data_df = pd.read_csv("../data.csv")
data_df

In [None]:
#Filter by welterweight weight class
welter_df = data_df[data_df["weight_class"]=="Welterweight"]
welter_df

In [None]:
#Drop columns that are irrelevant
welter_df = welter_df.drop(["location"], axis = 1)
welter_df = welter_df.drop(["Referee"], axis = 1)
welter_df = welter_df.drop(["weight_class"], axis = 1)
welter_df = welter_df.drop(["title_bout"], axis = 1)
welter_df

In [None]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
encode_df = pd.DataFrame(enc.fit_transform(welter_df.R_Stance.values.reshape(-1,1)))

# Rename encoded columns
encode_df.columns = enc.get_feature_names(['R_Stance'])
encode_df

In [None]:
# Create the OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
B_stance_df = pd.DataFrame(enc.fit_transform(welter_df.B_Stance.values.reshape(-1,1)))

# Rename encoded columns
B_stance_df.columns = enc.get_feature_names(['B_Stance'])
B_stance_df

In [None]:
# Create the OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
winner_df = pd.DataFrame(enc.fit_transform(welter_df.Winner.values.reshape(-1,1)))

# Rename encoded columns
winner_df.columns = enc.get_feature_names(['Winner'])
winner_df

In [None]:
#Merge experiment
merged_df = pd.merge(welter_df, encode_df, how = "left", left_index=True, right_index=True)
merged_df2 = pd.merge(merged_df, B_stance_df, how = "left", left_index=True, right_index=True)
merged_df3 = pd.merge(merged_df2, winner_df, how = "left", left_index=True, right_index=True)
merged_df3

In [None]:
#Drop original rows for Winner / Rstance / Bstance
merged_df3 = merged_df3.drop(["Winner"], axis = 1)
merged_df3 = merged_df3.drop(["R_Stance"], axis = 1)
merged_df3 = merged_df3.drop(["B_Stance"], axis = 1)
merged_df3

In [None]:
#Get rid of NaNs & drop columns that are not needed
merged_df3.dropna(inplace=True)
merged_df3 = merged_df3.drop(['B_avg_KD','B_avg_opp_KD','B_avg_SIG_STR_pct','B_avg_opp_SIG_STR_pct','B_avg_TD_pct','B_avg_opp_TD_pct','B_avg_SUB_ATT','B_avg_opp_SUB_ATT','B_avg_REV','B_avg_opp_REV','B_avg_SIG_STR_att','B_avg_SIG_STR_landed','B_avg_opp_SIG_STR_att','B_avg_opp_SIG_STR_landed','B_avg_TOTAL_STR_att','B_avg_TOTAL_STR_landed','B_avg_opp_TOTAL_STR_att','B_avg_opp_TOTAL_STR_landed','B_avg_TD_att','B_avg_TD_landed','B_avg_opp_TD_att','B_avg_opp_TD_landed','B_avg_HEAD_att','B_avg_HEAD_landed','B_avg_opp_HEAD_att','B_avg_opp_HEAD_landed','B_avg_BODY_att','B_avg_BODY_landed','B_avg_opp_BODY_att','B_avg_opp_BODY_landed','B_avg_LEG_att','B_avg_LEG_landed','B_avg_opp_LEG_att','B_avg_opp_LEG_landed','B_avg_DISTANCE_att','B_avg_DISTANCE_landed','B_avg_opp_DISTANCE_att','B_avg_opp_DISTANCE_landed','B_avg_CLINCH_att','B_avg_CLINCH_landed','B_avg_opp_CLINCH_att','B_avg_opp_CLINCH_landed','B_avg_GROUND_att','B_avg_GROUND_landed','B_avg_opp_GROUND_att','B_avg_opp_GROUND_landed','B_avg_CTRL_time(seconds)','B_avg_opp_CTRL_time(seconds)','R_avg_KD','R_avg_opp_KD','R_avg_SIG_STR_pct','R_avg_opp_SIG_STR_pct','R_avg_TD_pct','R_avg_opp_TD_pct','R_avg_SUB_ATT','R_avg_opp_SUB_ATT','R_avg_REV','R_avg_opp_REV','R_avg_SIG_STR_att','R_avg_SIG_STR_landed','R_avg_opp_SIG_STR_att','R_avg_opp_SIG_STR_landed','R_avg_TOTAL_STR_att','R_avg_TOTAL_STR_landed','R_avg_opp_TOTAL_STR_att','R_avg_opp_TOTAL_STR_landed','R_avg_TD_att','R_avg_TD_landed','R_avg_opp_TD_att','R_avg_opp_TD_landed','R_avg_HEAD_att','R_avg_HEAD_landed','R_avg_opp_HEAD_att','R_avg_opp_HEAD_landed','R_avg_BODY_att','R_avg_BODY_landed','R_avg_opp_BODY_att','R_avg_opp_BODY_landed','R_avg_LEG_att','R_avg_LEG_landed','R_avg_opp_LEG_att','R_avg_opp_LEG_landed','R_avg_DISTANCE_att','R_avg_DISTANCE_landed','R_avg_opp_DISTANCE_att','R_avg_opp_DISTANCE_landed','R_avg_CLINCH_att','R_avg_CLINCH_landed','R_avg_opp_CLINCH_att','R_avg_opp_CLINCH_landed','R_avg_GROUND_att','R_avg_GROUND_landed','R_avg_opp_GROUND_att','R_avg_opp_GROUND_landed','R_avg_CTRL_time(seconds)','R_avg_opp_CTRL_time(seconds)'], axis = 1)
merged_df3.head()

In [None]:
#change date to datetime
merged_df3['date'] = merged_df3['date'].apply(pd.to_datetime)
merged_df3.head()

In [None]:
#check dtypes
merged_df3.dtypes

In [None]:
#Sort by maximum total rounds fought with two dfs
r_fighters_df= merged_df3.sort_values(by='R_total_rounds_fought', ascending=False)
b_fighters_df= merged_df3.sort_values(by='B_total_rounds_fought', ascending=False)
b_fighters_df

In [None]:
#Delete duplicates in r_fighters & b_fighters
r_fighters_df = r_fighters_df.drop_duplicates(subset = "R_fighter", keep = "first")
b_fighters_df = b_fighters_df.drop_duplicates(subset = "B_fighter", keep = "first")
b_fighters_df

In [None]:
#merge both dfs
frames = [r_fighters_df,b_fighters_df]
fighters_df = pd.concat(frames)
fighters_df

In [None]:
#drop duplicates based on time fought
fighters_df = fighters_df.drop_duplicates(subset = 'B_total_time_fought(seconds)', keep = 'first')

In [None]:
#Export as csv
fighters_df.to_csv("fighters_clean.csv")


In [None]:
#Filter by welterweight weight class
lightweight_df = data_df[data_df["weight_class"]=="Lightweight"]
lightweight_df

In [None]:
#Drop columns that are irrelevant
lightweight_df = lightweight_df.drop(["location"], axis = 1)
lightweight_df = lightweight_df.drop(["Referee"], axis = 1)
lightweight_df = lightweight_df.drop(["weight_class"], axis = 1)
lightweight_df = lightweight_df.drop(["title_bout"], axis = 1)
lightweight_df

In [None]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
encode_df = pd.DataFrame(enc.fit_transform(lightweight_df.R_Stance.values.reshape(-1,1)))

# Rename encoded columns
encode_df.columns = enc.get_feature_names(['R_Stance'])
encode_df

In [None]:
# Create the OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
B_stance_df = pd.DataFrame(enc.fit_transform(lightweight_df.B_Stance.values.reshape(-1,1)))

# Rename encoded columns
B_stance_df.columns = enc.get_feature_names(['B_Stance'])
B_stance_df

In [None]:
# Create the OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
lwwinner_df = pd.DataFrame(enc.fit_transform(lightweight_df.Winner.values.reshape(-1,1)))

# Rename encoded columns
lwwinner_df.columns = enc.get_feature_names(['Winner'])
lwwinner_df

In [None]:
#Merge experiment
lwmerged_df = pd.merge(lightweight_df, encode_df, how = "left", left_index=True, right_index=True)
lwmerged_df2 = pd.merge(lwmerged_df, B_stance_df, how = "left", left_index=True, right_index=True)
lwmerged_df3 = pd.merge(lwmerged_df2, lwwinner_df, how = "left", left_index=True, right_index=True)
lwmerged_df3

In [None]:
#Drop original rows for Winner / Rstance / Bstance
lwmerged_df3 = lwmerged_df3.drop(["Winner"], axis = 1)
lwmerged_df3 = lwmerged_df3.drop(["R_Stance"], axis = 1)
lwmerged_df3 = lwmerged_df3.drop(["B_Stance"], axis = 1)
lwmerged_df3

In [None]:
#Get rid of Nans and columns that are not needed
lwmerged_df3.dropna(inplace=True)
lwmerged_df3 = lwmerged_df3.drop(['B_avg_KD','B_avg_opp_KD','B_avg_SIG_STR_pct','B_avg_opp_SIG_STR_pct','B_avg_TD_pct','B_avg_opp_TD_pct','B_avg_SUB_ATT','B_avg_opp_SUB_ATT','B_avg_REV','B_avg_opp_REV','B_avg_SIG_STR_att','B_avg_SIG_STR_landed','B_avg_opp_SIG_STR_att','B_avg_opp_SIG_STR_landed','B_avg_TOTAL_STR_att','B_avg_TOTAL_STR_landed','B_avg_opp_TOTAL_STR_att','B_avg_opp_TOTAL_STR_landed','B_avg_TD_att','B_avg_TD_landed','B_avg_opp_TD_att','B_avg_opp_TD_landed','B_avg_HEAD_att','B_avg_HEAD_landed','B_avg_opp_HEAD_att','B_avg_opp_HEAD_landed','B_avg_BODY_att','B_avg_BODY_landed','B_avg_opp_BODY_att','B_avg_opp_BODY_landed','B_avg_LEG_att','B_avg_LEG_landed','B_avg_opp_LEG_att','B_avg_opp_LEG_landed','B_avg_DISTANCE_att','B_avg_DISTANCE_landed','B_avg_opp_DISTANCE_att','B_avg_opp_DISTANCE_landed','B_avg_CLINCH_att','B_avg_CLINCH_landed','B_avg_opp_CLINCH_att','B_avg_opp_CLINCH_landed','B_avg_GROUND_att','B_avg_GROUND_landed','B_avg_opp_GROUND_att','B_avg_opp_GROUND_landed','B_avg_CTRL_time(seconds)','B_avg_opp_CTRL_time(seconds)','R_avg_KD','R_avg_opp_KD','R_avg_SIG_STR_pct','R_avg_opp_SIG_STR_pct','R_avg_TD_pct','R_avg_opp_TD_pct','R_avg_SUB_ATT','R_avg_opp_SUB_ATT','R_avg_REV','R_avg_opp_REV','R_avg_SIG_STR_att','R_avg_SIG_STR_landed','R_avg_opp_SIG_STR_att','R_avg_opp_SIG_STR_landed','R_avg_TOTAL_STR_att','R_avg_TOTAL_STR_landed','R_avg_opp_TOTAL_STR_att','R_avg_opp_TOTAL_STR_landed','R_avg_TD_att','R_avg_TD_landed','R_avg_opp_TD_att','R_avg_opp_TD_landed','R_avg_HEAD_att','R_avg_HEAD_landed','R_avg_opp_HEAD_att','R_avg_opp_HEAD_landed','R_avg_BODY_att','R_avg_BODY_landed','R_avg_opp_BODY_att','R_avg_opp_BODY_landed','R_avg_LEG_att','R_avg_LEG_landed','R_avg_opp_LEG_att','R_avg_opp_LEG_landed','R_avg_DISTANCE_att','R_avg_DISTANCE_landed','R_avg_opp_DISTANCE_att','R_avg_opp_DISTANCE_landed','R_avg_CLINCH_att','R_avg_CLINCH_landed','R_avg_opp_CLINCH_att','R_avg_opp_CLINCH_landed','R_avg_GROUND_att','R_avg_GROUND_landed','R_avg_opp_GROUND_att','R_avg_opp_GROUND_landed','R_avg_CTRL_time(seconds)','R_avg_opp_CTRL_time(seconds)'], axis = 1)
lwmerged_df3

In [None]:
#change date to datetime
lwmerged_df3['date'] = lwmerged_df3['date'].apply(pd.to_datetime)
lwmerged_df3

In [None]:
#check dtypes
lwmerged_df3.dtypes

In [None]:
#Sort by maximum total rounds fought with two dfs
lw_r_fighters_df= lwmerged_df3.sort_values(by='R_total_rounds_fought', ascending=False)
lw_b_fighters_df= lwmerged_df3.sort_values(by='B_total_rounds_fought', ascending=False)
lw_b_fighters_df

In [None]:
#Delete duplicates in r_fighters & b_fighters
lw_r_fighters_df = lw_r_fighters_df.drop_duplicates(subset = "R_fighter", keep = "first")
lw_b_fighters_df = lw_b_fighters_df.drop_duplicates(subset = "B_fighter", keep = "first")
lw_b_fighters_df

In [None]:
#merge both dfs
frames = [lw_r_fighters_df,lw_b_fighters_df]
lw_fighters_df = pd.concat(frames)
lw_fighters_df

In [None]:
#Export as csv
lwmerged_df3.to_csv("LW_clean_data.csv")
