# Pre-process fight data

In [14]:
import pandas as pd
import numpy as np
import datetime as dt

df_fight = pd.read_csv('./data/data.csv')
df_fight_raw = pd.read_csv('./data/raw_total_fight_data.csv', sep=";")

## Separate fight data to only contain information for single fighter

### Separate red/blue fighter data

In [15]:
def create_single_fight_df(pfx, opp_pfx, color, df):
    cols_single_fight = [k for k in df.columns if k[:2] != opp_pfx or k == f'{opp_pfx}fighter'] # Keep only single fighter data
    df_single = df.loc[:, cols_single_fight]
    df_single.columns = df_single.columns.str.removeprefix(pfx)
    df_single.columns = df_single.columns.str.replace(f'{opp_pfx}fighter', 'opponent')
    df_single['fighter_color'] = color
    df_single['outcome'] = np.where(df_single['Winner'] == color, "Won", "Lost")
    return df_single

In [16]:
df_red_fight = create_single_fight_df('R_', 'B_', 'Red', df_fight)
df_blue_fight = create_single_fight_df('B_', 'R_', 'Blue', df_fight)

### Combine fighter data and write to file

In [17]:
df_fight_by_fighters = pd.concat([df_red_fight, df_blue_fight], axis=0)
df_fight_by_fighters = df_fight_by_fighters.sort_values(by=['fighter', 'date'])

In [18]:
df_fight_by_fighters.to_csv('../src/data/fight_by_fighters.csv', index=False)

### Create index of fighters based on those in data set

In [19]:
df_fighters_list = df_fight_by_fighters.copy().drop_duplicates(subset='fighter', keep="last")

In [20]:
df_fighters_list.to_csv('../src/data/fighters_list.csv', index=False)

## Parse significant strikes from raw fight data

### Separate each fight into rows for each fighter

In [21]:
# Post-processed metadata columns to keep
cols_meta = [
    'fighter',
    'opponent_fighter',
    'date'
]

# Post-processed strike columns to keep
cols_str = [
    'sig_str',
    'opponent_sig_str',
    'total_str',
    'opponent_total_str', 
    'td',
    'opponent_td',
    'head',
    'opponent_head',
    'body',
    'opponent_body',
    'leg',
    'opponent_leg',
    'distance',
    'opponent_distance',
    'clinch',
    'opponent_clinch',
    'ground',
    'opponent_ground'
]

def create_fight_str_df(pfx, opp_pfx, color, df):
    df_fight = df.copy()

    df_fight.columns = df_fight.columns.str.removeprefix(pfx)
    df_fight.columns = df_fight.columns.str.removesuffix('.') # Remove periods from column names
    df_fight.columns = df_fight.columns.str.replace(f'{opp_pfx}', 'opponent_')
    df_fight.columns = df_fight.columns.str.lower()

    df_fight['fighter_color'] = color
    
    df_fight = df_fight.loc[:, [*cols_meta, *cols_str]]
    return df_fight

In [22]:
df_red_fight_str = create_fight_str_df('R_', 'B_', 'Red', df_fight_raw)
df_blue_fight_str = create_fight_str_df('B_', 'R_', 'Blue', df_fight_raw)

df_fight_str = pd.concat([df_red_fight_str, df_blue_fight_str], axis=0)
df_fight_str = df_fight_str.sort_values(by=['fighter', 'date'])

### Parse date

In [23]:
df_fight_str['date'] = df_fight_str['date'].map(lambda cell: dt.datetime.strptime(cell, '%B %d, %Y'))

### Parse strike columns into successful, unsuccessful, and attempts

In [24]:
cols_str_with_attempts = []
for col in cols_str:
    df_fight_str[f'{col}_attempts'] = df_fight_str[col].map(lambda cell: int(cell.split(' of ')[1]))
    df_fight_str[col] = df_fight_str[col].map(lambda cell: int(cell.split(' of ')[0]))
    df_fight_str[f'{col}_misses'] = df_fight_str[f'{col}_attempts'] - df_fight_str[col]
    cols_str_with_attempts.extend([col, f'{col}_misses', f'{col}_attempts'])

df_fight_str = df_fight_str[[*cols_meta, *cols_str_with_attempts]]

In [25]:
df_fight_str.to_csv('../src/data/fight_str.csv', index=False)

## Separate fight data to only contain information for single fighter for latest fighter stats

### Separate red/blue fighter data

In [None]:
def create_single_fight_df(pfx, opp_pfx, color, df):
    cols_single_fight = [k for k in df.columns if k[:2] != opp_pfx or k == f'{opp_pfx}fighter'] # Exclude opponent data
    df_single = df.loc[:, cols_single_fight]
    df_single.columns = df_single.columns.str.removeprefix(pfx)
    df_single.columns = df_single.columns.str.replace(f'{opp_pfx}fighter', 'opponent')
    df_single['fighter_color'] = color
    df_single['outcome'] = np.where(df_single['Winner'] == color, "Won", "Lost")
    return df_single

In [None]:
df_red_fight = create_single_fight_df('R_', 'B_', 'Red', df_fight)
df_blue_fight = create_single_fight_df('B_', 'R_', 'Blue', df_fight)

### Combine fighter data and write to file

In [None]:
df_fight_by_fighters = pd.concat([df_red_fight, df_blue_fight], axis=0)
df_fight_by_fighters = df_fight_by_fighters.sort_values(by=['fighter', 'date'])

In [None]:
df_fight_by_fighters.to_csv('../src/data/fight_by_fighters.csv', index=False)

### Create index of fighters based on those in data set

In [None]:
df_fighters_list = df_fight_by_fighters.copy().drop_duplicates(subset='fighter', keep="last")

In [None]:
df_fighters_list.to_csv('../src/data/latest_fighter_stats.csv', index=False)