## unit ftrs : selected units to attack

##### Taehwan Jeong (xikizima@hotmail.com)

### 공통 (Common)

In [16]:
import pickle, datetime
import pandas as pd
import numpy as np
pd.options.display.max_rows=1000
pd.options.display.max_columns=1000
pd.options.display.max_colwidth = -1

from collections import Counter

from matplotlib import pyplot as plt

In [2]:
# 정규식
import re
cmp = re.compile('(?:Target:\s([A-Za-z]*)\s\[([0-9A-Z]*)\][\s;]*)?Location:\s\(([0-9]*)[.0-9]*,\s([0-9]*)[.0-9]*.*?\)')
coors = re.compile('(?:Location:|at)\s\(([0-9]*)[.0-9]*,\s([0-9]*)[.0-9]*.*?\)')
units = re.compile('([0-9A-Za-z]*)\s\[([0-9A-Z]*)\]')

### 학습용 (Train)

In [3]:
unit_info = pd.read_csv('./data/unit_info_data.csv')
whole_unit_set = set(unit_info['unit'])
unit_set = unit_info.groupby('species')['unit'].apply(set).to_dict()

unit_supply_dict = unit_info.set_index('unit')['supply'].to_dict()

del unit_supply_dict['Probe']
del unit_supply_dict['SCV']
del unit_supply_dict['Drone']

In [36]:
def build_attack_units_cnt_ftr(df, idx):
    df = df[(df['game_id'] < (idx*1000)+1000) & (df['game_id'] >= (idx*1000))]

    df.loc[:, 'slice'] = (df['time'] // 1).astype(int)
    df = df[df['slice'] < 12]

    df = df[(df['event'].isin(['Selection'])) | ((df['event'].isin(['Ability'])) & (df['event_contents'].str.contains('Attack')))]
    df['units'] = df[(df['event'].isin(['Selection']))]['event_contents'].map(units.findall).map(lambda row: [x[0] for x in row])
#    df['attack_units'] = df[(df['event'].isin(['Selection']))].apply(lambda row: [x for x in row['units'] if x in unit_dict[row['game_id']][row['player']]], axis=1)
    df['attack_units'] = df[(df['event'].isin(['Selection']))].apply(lambda row: [unit_supply_dict.get(x) for x in row['units'] if x in unit_supply_dict], axis=1)
    _df = df[(~df['attack_units'].isna())]
    df = df[(df['event'].isin(['Ability'])) | (_df['attack_units'].map(len) > 0)]
    df['selected_units'] = df.groupby(by=['game_id', 'player'])['attack_units'].shift()
    df = df[(df['event'] == 'Ability') & (~df['selected_units'].isna())]
    df['attack_unit_cnts'] = df['selected_units'].map(sum)

    _total = df.groupby(by=['game_id', 'player'])['attack_unit_cnts'].sum().unstack().fillna(0.0)
    _total.columns = ['p0_attack_units', 'p1_attack_units']

    _per_slice = df.groupby(by=['game_id', 'player', 'slice'])['attack_unit_cnts'].sum().unstack(level=1).fillna(0.0).unstack().fillna(0.0)
    _per_slice.columns = [f'attack_units_p{x[0]}_s{x[1]}' for x in _per_slice.columns]
    ret = pd.merge(_total,_per_slice, left_index=True, right_index=True).reset_index()
    
    cols = ['game_id','p0_attack_units','p1_attack_units'] + [f'attack_units_p{uidx}_s{sidx}' for uidx in range(2) for sidx in range(5,12)]
    for col in cols:
        if not col in ret.columns:
            ret[col] = 0.0
    return ret[cols]

### 학습용 (Train)

In [37]:
df = pd.read_csv('./data/train.csv') #, nrows=4000000)
for idx in range(39):
    t1 = datetime.datetime.now()
    ret = build_attack_units_cnt_ftr(df, idx)
    ret.to_csv('./data/attack_units_cnt_train.csv', mode='a' if idx > 0 else 'w', index=False, header=None if idx > 0 else True)
    print(f'[{idx}] {(datetime.datetime.now() - t1).seconds:.1f} secs')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


[0] 11.0 secs
[1] 8.0 secs
[2] 8.0 secs
[3] 8.0 secs
[4] 8.0 secs
[5] 8.0 secs
[6] 8.0 secs
[7] 8.0 secs
[8] 8.0 secs
[9] 8.0 secs
[10] 8.0 secs
[11] 8.0 secs
[12] 8.0 secs
[13] 8.0 secs
[14] 8.0 secs
[15] 8.0 secs
[16] 9.0 secs
[17] 8.0 secs
[18] 8.0 secs
[19] 8.0 secs
[20] 8.0 secs
[21] 8.0 secs
[22] 8.0 secs
[23] 8.0 secs
[24] 8.0 secs
[25] 8.0 secs
[26] 8.0 secs
[27] 8.0 secs
[28] 8.0 secs
[29] 8.0 secs
[30] 8.0 secs
[31] 8.0 secs
[32] 8.0 secs
[33] 8.0 secs
[34] 8.0 secs
[35] 8.0 secs
[36] 8.0 secs
[37] 8.0 secs
[38] 7.0 secs


### 검증용 (Test)

In [38]:
#unit_dict = pickle.load(open('./data/unit_dict_test.pkl', 'rb'))
df = pd.read_csv('./data/test.csv')
START_IDX = 38
for idx in range(START_IDX, 56):
    t1 = datetime.datetime.now()
    ret = build_attack_units_cnt_ftr(df, idx)
    ret.to_csv('./data/attack_units_cnt_test.csv', mode='a' if idx > START_IDX else 'w', index=False, header=None if idx > START_IDX else True)
    print(f'[{idx}] {(datetime.datetime.now() - t1).seconds:.1f} secs')

[38] 4.0 secs
[39] 7.0 secs
[40] 7.0 secs
[41] 7.0 secs
[42] 7.0 secs
[43] 8.0 secs
[44] 7.0 secs
[45] 8.0 secs
[46] 8.0 secs
[47] 8.0 secs
[48] 7.0 secs
[49] 7.0 secs
[50] 7.0 secs
[51] 8.0 secs
[52] 7.0 secs
[53] 7.0 secs
[54] 7.0 secs
[55] 5.0 secs
