### 공통

In [2]:
import pickle
import pandas as pd
import numpy as np
pd.options.display.max_rows=1000
pd.options.display.max_colwidth = -1

from collections import Counter

In [3]:
# 정규식
import re
cmp = re.compile('(?:Target:\s([A-Za-z]*)\s\[([0-9A-Z]*)\][\s;]*)?Location:\s\(([0-9]*)[.0-9]*,\s([0-9]*)[.0-9]*.*?\)')
units = re.compile('([0-9A-Za-z]*)\s\[([0-9A-Z]*)\]')

### 학습용

In [314]:
df = pd.read_csv('./data/train.csv') #, nrows=1000000)

In [315]:
df = df[df['event'].isin(['Selection'])]
arr_units = df['event_contents'].map(units.findall)
del df['event_contents']
df = pd.concat([df, arr_units], axis=1)

In [316]:
unit_dict = {}
species_units = {'Z':{}, 'P':{}, 'T':{}}
for idx, data in df.iterrows():
    if not data['game_id'] in unit_dict:
        unit_dict[data['game_id']] = [{}, {}]
    for unit_name, unit_id in data['event_contents']:
        if unit_name and len(unit_name) > 0: # and len(npcs.findall(unit_name)) == 0:
            # check the enemy has it
            if not unit_id in unit_dict[data['game_id']][(data['player'] + 1 ) % 2]:
                unit_dict[data['game_id']][data['player']][unit_id] = unit_name
                # save unit names per each species
                if unit_name in species_units[data['species']]:
                    species_units[data['species']][unit_name] += 1
                else:
                    species_units[data['species']][unit_name] = 1
        else:
            print(f"{idx}: [gid {data['game_id']}] Invalid item name {unit_name} ({unit_id})")

In [487]:
# feature 생성 시, Selection Action 상위 100개까지만 취해서 feature로 만듬
columns100_dict = {}
for sp_key in ['Z', 'P', 'T']:
    #print('-------------------------------', sp_key)
    columns100_dict[sp_key] = pd.Series(species_units[sp_key]).sort_values(ascending=False)[:100].to_dict()
    #print(columns100_dict[sp_key])

In [320]:
species_dict = df.groupby(by=['game_id', 'player'])['species'].first().to_dict()

In [321]:
winner_dict = df.groupby(by=['game_id'])['winner'].first().to_dict()

In [318]:
pickle.dump(unit_dict, open('./data/unit_dict.pkl', 'wb'))
pickle.dump(species_units, open('./data/species_units_dict.pkl', 'wb'))
pickle.dump(species_dict, open('./data/species_dict.pkl', 'wb'))
pickle.dump(winner_dict, open('./data/winner_dict.pkl', 'wb'))
pickle.dump(columns100_dict, open('./data/columns100_dict.pkl', 'wb'))

In [407]:
col_names = [f'p{pid}_u{uid}' for pid in range(2) for uid in range(100)]

pd.DataFrame(['game_id'] + col_names).T.to_csv('./data/ftr_top100_unit_counts.csv', mode='w', header=None, index=False)
for GID in range(df['game_id'].max() + 1):
#     print(GID)
    game_id = pd.Series([GID], index=['game_id'])
    p1 = pd.Series(dict(Counter(list(unit_dict[GID][0].values()))), index=columns100_dict[species_dict[(GID,0)] if (GID, 0) in species_dict else 'T'].keys()) #.reset_index(drop=True)
    p2 = pd.Series(dict(Counter(list(unit_dict[GID][1].values()))), index=columns100_dict[species_dict[(GID,1)] if (GID, 1) in species_dict else 'T'].keys())
    pd.concat([game_id, p1,p2]).to_frame().T.fillna(0).astype(int).to_csv('./data/ftr_top100_unit_counts.csv', mode='a', header=None, index=False)

### 예측용

In [4]:
df = pd.read_csv('./data/test.csv') #, nrows=1000000)
df.shape, df['game_id'].max()

(28714849, 6)

In [6]:
df = df[df['event'].isin(['Selection'])]
arr_units = df['event_contents'].map(units.findall)
del df['event_contents']
df = pd.concat([df, arr_units], axis=1)

In [7]:
unit_dict = {}
species_units = {'Z':{}, 'P':{}, 'T':{}}
for idx, data in df.iterrows():
    if not data['game_id'] in unit_dict:
        unit_dict[data['game_id']] = [{}, {}]
    for unit_name, unit_id in data['event_contents']:
        if unit_name and len(unit_name) > 0: # and len(npcs.findall(unit_name)) == 0:
            # check the enemy has it
            if not unit_id in unit_dict[data['game_id']][(data['player'] + 1 ) % 2]:
                unit_dict[data['game_id']][data['player']][unit_id] = unit_name
                # save unit names per each species
                if unit_name in species_units[data['species']]:
                    species_units[data['species']][unit_name] += 1
                else:
                    species_units[data['species']][unit_name] = 1
        else:
            print(f"{idx}: [gid {data['game_id']}] Invalid item name {unit_name} ({unit_id})")

In [15]:
pickle.dump(unit_dict, open('./data/unit_dict_test.pkl', 'wb'))
pickle.dump(species_units, open('./data/species_units_dict_test.pkl', 'wb'))

In [8]:
species_dict = pickle.load(open('./data/species_dict.pkl', 'rb'))
columns100_dict = pickle.load(open('./data/columns100_dict.pkl', 'rb'))

In [14]:
col_names = [f'p{pid}_u{uid}' for pid in range(2) for uid in range(100)]

pd.DataFrame(['game_id'] + col_names).T.to_csv('./data/ftr_top100_unit_counts_test.csv', mode='w', header=None, index=False)
for GID in set(df['game_id']): #range(df['game_id'].max() + 1):
#     print(GID)
    game_id = pd.Series([GID], index=['game_id'])
    p1 = pd.Series(dict(Counter(list(unit_dict[GID][0].values()))), index=columns100_dict[species_dict[(GID,0)] if (GID, 0) in species_dict else 'T'].keys()) #.reset_index(drop=True)
    p2 = pd.Series(dict(Counter(list(unit_dict[GID][1].values()))), index=columns100_dict[species_dict[(GID,1)] if (GID, 1) in species_dict else 'T'].keys())
    pd.concat([game_id, p1,p2]).to_frame().T.fillna(0).astype(int).to_csv('./data/ftr_top100_unit_counts_test.csv', mode='a', header=None, index=False)