In [20]:
import pandas as pd
import numpy as np

import json

In [87]:
# this is the current format of the data (10/25)
# the goal of this notebook is to update that data to a cleaner format
stat_data = pd.read_pickle("Pickled big stats.pkl").set_index('Fight')
fight_data =  pd.read_pickle('pickled fights.pkl')
fighter_data = pd.read_pickle('pickled fighter stats.pkl')

In [88]:
# first step is to have the data set up in an ideal format
def setup_dataframes(stat_data, fight_data):
    stat_data = setup_stat_data(stat_data)
    fight_data = setup_fight_data(fight_data)
    stat_data = join_stat_fight_id(stat_data, fight_data, 'fight_name', 'fight_id')
    return stat_data, fight_data

def setup_stat_data(stat_data):
    returned = stat_data.copy()
    returned.reset_index(inplace=True)
    returned.rename(columns={'Fight': 'fight_name'}, inplace=True)
    returned['Fighter'] = returned['Fighter'] - 1
    return returned

def setup_fight_data(fight_data):
    returned = fight_data.copy()
    returned.reset_index(inplace=True)
    returned.rename(columns = {'0': 'fighter_0', '1': 'fighter_1', '2': 'winner', '3': 'method',
                               '4': 'round_end', 'Fight': 'fight_name', 'Date': 'date'}, inplace=True)
    returned.drop(columns=['ELO1', 'ELO2'], inplace=True)
    returned['winner'] = (returned['winner'] - 1)
    returned['fight_id'] = returned.index
    
    return returned

def join_stat_fight_id(stat_data, fight_data, fight_name_col, fight_id_col):
    returned = stat_data.copy()
    to_merge = fight_data[[fight_name_col, fight_id_col]]
    returned = returned.merge(to_merge, on=fight_name_col, how='inner')
    returned.drop(columns=[fight_name_col], inplace=True)
    return returned

# here, we set up stat_data and fight data
df_stat_data, df_fight_data = setup_dataframes(stat_data, fight_data)

In [89]:
# here, instead of storing fighter data in a dataframe, we store it in a dictionary/JSON
def gen_fighter_history_dict(df_stat_data, df_fight_data, df_fighter_data):
    
    fight_to_id = {df_fight_data['fight_name'][i]: int(df_fight_data['fight_id'][i])
                   for i in df_fight_data.index}
    df_fighter_data = df_fighter_data.copy()
    df_fighter_data['fight_id'] = df_fighter_data['Fights']\
        .apply(lambda x: [fight_to_id[fight] for fight in x])
    df_fighter_data.reset_index(inplace=True)
    return {i: {'fighter_names': list(set([df_fighter_data['Fighter'][i], 
                                          df_fighter_data['Alternate'][i]])),
                'fight_ids': df_fighter_data['fight_id'][i]}
            for i in df_fighter_data.index}

fighter_dict = gen_fighter_history_dict(df_stat_data, df_fight_data, fighter_data)

In [90]:
def df_fight_data_replace_fighter_names_with_ids(df_fight_data, fighter_dict):
    name_dict = {}
    for fighter_id, fighter_data in fighter_dict.items():
        for fighter_name in fighter_data['fighter_names']:
            name_dict[fighter_name] = fighter_id
    returned = df_fight_data.copy()
    returned['fighter_0'] = returned['fighter_0'].apply(lambda x: name_dict[x])
    returned['fighter_1'] = returned['fighter_1'].apply(lambda x: name_dict[x])
    return returned, name_dict

df_fight_data, name_dict = df_fight_data_replace_fighter_names_with_ids(df_fight_data, fighter_dict)

In [92]:
# save data to csv/JSON
df_stat_data.to_csv('data/df_stat_data_10_25.csv', index=False)
df_fight_data.to_csv('data/df_fight_data_10_25.csv', index=False)

with open("data/fighter_hist.json", "w") as f:
    json.dump(fighter_dict, f)
    
with open("data/fighter_name_to_id.json", "w") as f:
    json.dump(name_dict, f)

In [93]:
# read data from csv/JSON
df_stat_data = pd.read_csv('data/df_stat_data_10_25.csv')
df_fight_data = pd.read_csv('data/df_fight_data_10_25.csv')

with open('data/fighter_hist.json') as json_file:
    fighter_dict = json.load(json_file)
    
with open('data/fighter_name_to_id.json') as json_file:
    name_dict = json.load(json_file)

In [85]:
df_stat_data.set_index('fight_id')

Unnamed: 0_level_0,Fighter,Landed,Out Of,Round,Seconds,Stat
fight_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
5291,0,0,-1,1,300,kd
5291,1,0,-1,1,300,kd
5291,0,15,29,1,300,sig str
5291,1,13,18,1,300,sig str
5291,0,15,29,1,300,total str
5291,1,16,21,1,300,total str
5291,0,0,0,1,300,td
5291,1,0,1,1,300,td
5291,0,0,-1,1,300,sub att
5291,1,0,-1,1,300,sub att
