# Pre-process fight data

In [2]:
import pandas as pd
df_fight = pd.read_csv('./data/data.csv')

### Separate red fighter data

In [29]:
cols_red_fight = [k for k in df_fight.columns if k[:2] != 'B_' or k == 'B_fighter']
df_red_fight = df_fight.loc[:, cols_red_fight]
df_red_fight.columns = df_red_fight.columns.str.removeprefix('R_')
df_red_fight.columns = df_red_fight.columns.str.replace('B_fighter', 'opponent')
df_red_fight['fighter_color'] = 'Red'
df_red_fight['is_winner'] = (df_red_fight['Winner'] == 'Red').astype(int)

### Separate blue fighter data

In [30]:
cols_blue_fight = [k for k in df_fight.columns if k[:2] != 'R_' or k == 'R_fighter']
df_blue_fight = df_fight.loc[:, cols_blue_fight]
df_blue_fight.columns = df_blue_fight.columns.str.removeprefix('B_')
df_blue_fight.columns = df_blue_fight.columns.str.replace('R_fighter', 'opponent')
df_blue_fight['fighter_color'] = 'Blue'
df_blue_fight['is_winner'] = (df_blue_fight['Winner'] == 'Blue').astype(int)

### Combine fighter data and write to file

In [35]:
df_fight_by_fighters = pd.concat([df_red_fight, df_blue_fight], axis=0).sort_values('date')
df_fight_by_fighters = df_fight_by_fighters.sort_values(by=['fighter', 'date'])
df_fight_by_fighters

Unnamed: 0,fighter,opponent,Referee,date,location,Winner,title_bout,weight_class,avg_KD,avg_opp_KD,...,win_by_KO/TKO,win_by_Submission,win_by_TKO_Doctor_Stoppage,Stance,Height_cms,Reach_cms,Weight_lbs,age,fighter_color,is_winner
510,Aalon Cruz,Spike Carlyle,Scott Howard,2020-02-29,"Norfolk, Virginia, USA",Blue,False,Featherweight,,,...,0,0,0,Switch,182.88,198.12,145.0,30.0,Red,0
34,Aalon Cruz,Uros Medic,Mark Smith,2021-03-06,"Las Vegas, Nevada, USA",Red,False,Lightweight,0.00,0.0,...,0,0,0,Switch,182.88,198.12,145.0,31.0,Blue,0
5779,Aaron Brink,Andrei Arlovski,John McCarthy,2000-11-17,"Atlantic City, New Jersey, USA",Red,False,Heavyweight,,,...,0,0,0,Orthodox,190.50,,205.0,26.0,Blue,0
3320,Aaron Phillips,Sam Sicilia,Jason Herzog,2014-05-24,"Las Vegas, Nevada, USA",Red,False,Featherweight,,,...,0,0,0,Southpaw,175.26,180.34,135.0,24.0,Blue,0
3168,Aaron Phillips,Matt Hobar,Kevin Nix,2014-08-23,"Tulsa, Oklahoma, USA",Blue,False,Bantamweight,0.00,0.0,...,0,0,0,Southpaw,175.26,180.34,135.0,25.0,Red,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2557,Zubaira Tukhugov,Phillipe Nover,Mark Smith,2015-12-10,"Las Vegas, Nevada, USA",Red,False,Featherweight,0.00,0.0,...,1,0,0,Orthodox,172.72,172.72,145.0,24.0,Red,1
2361,Zubaira Tukhugov,Renato Moicano,Eduardo Herdy,2016-05-14,"Curitiba, Parana, Brazil",Red,False,Featherweight,0.00,0.0,...,1,0,0,Orthodox,172.72,172.72,145.0,25.0,Blue,0
733,Zubaira Tukhugov,Lerone Murphy,Herb Dean,2019-09-07,"Abu Dhabi, Abu Dhabi, United Arab Emirates",Draw,False,Featherweight,0.00,0.0,...,1,0,0,Orthodox,172.72,172.72,145.0,28.0,Red,0
518,Zubaira Tukhugov,Kevin Aguilar,Marc Goddard,2020-02-22,"Auckland, New Zealand",Blue,False,Featherweight,0.50,0.0,...,1,0,0,Orthodox,172.72,172.72,145.0,29.0,Blue,1


In [34]:
df_fight_by_fighters.to_csv('../src/data/fight_by_fighters.csv')

### Create index of fighters based on those in data set

In [36]:
df_fighters_list = df_fight_by_fighters.copy().drop_duplicates(subset='fighter', keep="last")
df_fighters_list

Unnamed: 0,fighter,opponent,Referee,date,location,Winner,title_bout,weight_class,avg_KD,avg_opp_KD,...,win_by_KO/TKO,win_by_Submission,win_by_TKO_Doctor_Stoppage,Stance,Height_cms,Reach_cms,Weight_lbs,age,fighter_color,is_winner
34,Aalon Cruz,Uros Medic,Mark Smith,2021-03-06,"Las Vegas, Nevada, USA",Red,False,Lightweight,0.00,0.000000,...,0,0,0,Switch,182.88,198.12,145.0,31.0,Blue,0
5779,Aaron Brink,Andrei Arlovski,John McCarthy,2000-11-17,"Atlantic City, New Jersey, USA",Red,False,Heavyweight,,,...,0,0,0,Orthodox,190.50,,205.0,26.0,Blue,0
375,Aaron Phillips,Jack Shore,Herb Dean,2020-07-15,"Abu Dhabi, Abu Dhabi, United Arab Emirates",Red,False,Bantamweight,0.00,0.000000,...,0,0,0,Southpaw,175.26,180.34,135.0,30.0,Blue,0
3686,Aaron Riley,Justin Salas,Randy Corley,2013-07-27,"Seattle, Washington, USA",Blue,False,Lightweight,0.00,0.039062,...,0,0,0,Southpaw,172.72,175.26,155.0,32.0,Red,0
4148,Aaron Rosa,James Te Huna,Leon Roberts,2012-03-02,"Sydney, New South Wales, Australia",Red,False,LightHeavyweight,0.00,0.000000,...,0,0,0,Orthodox,193.04,195.58,205.0,28.0,Blue,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,Zhalgas Zhumagulov,Amir Albazi,Jason Herzog,2021-01-23,"Abu Dhabi, Abu Dhabi, United Arab Emirates",Red,False,Flyweight,0.00,0.000000,...,0,0,0,Switch,162.56,167.64,125.0,32.0,Blue,0
2818,Zhang Lipeng,Kajan Johnson,Greg Kleynjans,2015-05-16,"Manila, Philippines",Blue,False,Lightweight,0.00,0.000000,...,0,0,0,Southpaw,180.34,180.34,155.0,25.0,Red,0
3936,Zhang Tiequan,Jon Tuck,Marc Goddard,2012-11-10,"Macau, China",Blue,False,Lightweight,0.00,1.000000,...,0,1,0,Orthodox,172.72,175.26,155.0,34.0,Red,0
493,Zhang Weili,Joanna Jedrzejczyk,Keith Peterson,2020-03-07,"Las Vegas, Nevada, USA",Red,True,WomenStrawweight,0.50,0.000000,...,1,1,0,Switch,162.56,160.02,115.0,30.0,Red,1


In [37]:
df_fighters_list.to_csv('../src/data/fighters_list.csv')