## camera ftrs : camera moving range per minutes

#### Taehwan jeong (xikizima@hotmail.com)

### 공통 (Common)

In [1]:
import pickle, datetime
import pandas as pd
import numpy as np
pd.options.display.max_rows=1000
pd.options.display.max_colwidth = -1

from collections import Counter

from matplotlib import pyplot as plt

In [2]:
# 정규식
import re
cmp = re.compile('(?:Target:\s([A-Za-z]*)\s\[([0-9A-Z]*)\][\s;]*)?Location:\s\(([0-9]*)[.0-9]*,\s([0-9]*)[.0-9]*.*?\)')
coors = re.compile('(?:Location:|at)\s\(([0-9]*)[.0-9]*,\s([0-9]*)[.0-9]*.*?\)')
units = re.compile('([0-9A-Za-z]*)\s\[([0-9A-Z]*)\]')

### 학습용 (Train)

In [59]:
def build_camera_moving_stats_ftr(df, idx):
    df = df[(df['game_id'] < (idx*1000)+1000) & (df['game_id'] >= (idx*1000))]
    df = df[df['event'].isin(['Camera', 'Right Click'])]

    arr_coors = df['event_contents'].map(coors.findall).map(lambda x: x[0]).apply(pd.Series)
    arr_coors.columns = ['x', 'y']
    arr_coors['x'] = arr_coors['x'].astype('int')
    arr_coors['y'] = arr_coors['y'].astype('int')
    df = pd.concat([df, arr_coors], axis=1)

    df.loc[:, 'slice'] = (df['time'] // 1).astype(int)
    df = df[df['slice'] < 10]

    df['x_prev'] = df.groupby(by=['game_id', 'player'])['x'].shift()
    df['y_prev'] = df.groupby(by=['game_id', 'player'])['y'].shift()
    df = df[(~df['x_prev'].isna()) & (~df['y_prev'].isna())]
    df['dist_each'] = np.sqrt((df['x_prev'] - df['x']) ** 2 + (df['y_prev'] - df['y']) ** 2)
    movings_std = df.groupby(by=['game_id', 'player'])['dist_each'].agg(['mean', 'std']).unstack()
    movings_std.columns = ['p0_moving_mean', 'p1_moving_mean', 'p0_moving_std', 'p1_moving_std']
    movings_std = movings_std.reset_index()
    movings_per_slice = df.groupby(by=['game_id', 'player','slice'])['dist_each'].agg(['mean', 'std']).unstack(level=1)
    movings_per_slice = movings_per_slice.unstack()
    movings_per_slice.columns = [f'cam_moving_p{x[1]}_{x[0]}{x[2]}' for x in movings_.columns]
    movings_per_slice = movings_per_slice.reset_index()
    ret = pd.merge(movings_std, movings_per_slice, on='game_id')
    return ret

In [60]:
df = pd.read_csv('./data/train.csv') #, nrows=4000000)
for idx in range(39):
    t1 = datetime.datetime.now()
    ret = build_camera_moving_stats_ftr(df, idx)
    ret.to_csv('./data/camera_moving_stats_train.csv', mode='a' if idx > 0 else 'w', index=False, header=None if idx > 0 else True)
    print(f'[{idx}] {(datetime.datetime.now() - t1).seconds:.1f} secs')

[0] 255.0 secs
[1] 242.0 secs
[2] 253.0 secs
[3] 243.0 secs
[4] 244.0 secs
[5] 242.0 secs
[6] 249.0 secs
[7] 250.0 secs
[8] 254.0 secs
[9] 250.0 secs
[10] 252.0 secs
[11] 249.0 secs
[12] 249.0 secs
[13] 248.0 secs
[14] 250.0 secs
[15] 244.0 secs
[16] 249.0 secs
[17] 249.0 secs
[18] 240.0 secs
[19] 245.0 secs
[20] 252.0 secs
[21] 267.0 secs
[22] 251.0 secs
[23] 249.0 secs
[24] 252.0 secs
[25] 246.0 secs
[26] 245.0 secs
[27] 240.0 secs
[28] 248.0 secs
[29] 250.0 secs
[30] 248.0 secs
[31] 251.0 secs
[32] 248.0 secs
[33] 249.0 secs
[34] 260.0 secs
[35] 245.0 secs
[36] 248.0 secs
[37] 254.0 secs
[38] 218.0 secs


### 예측용 (Test)

In [61]:
df = pd.read_csv('./data/test.csv')
START_IDX = 38
for idx in range(START_IDX, 56):
    t1 = datetime.datetime.now()
    ret = build_camera_moving_stats_ftr(df, idx)
    ret.to_csv('./data/camera_moving_stats_test.csv', mode='a' if idx > START_IDX else 'w', index=False, header=None if idx > START_IDX else True)
    print(f'[{idx}] {(datetime.datetime.now() - t1).seconds:.1f} secs')

[38] 33.0 secs
[39] 243.0 secs
[40] 237.0 secs
[41] 235.0 secs
[42] 232.0 secs
[43] 236.0 secs
[44] 234.0 secs
[45] 235.0 secs
[46] 238.0 secs
[47] 243.0 secs
[48] 232.0 secs
[49] 229.0 secs
[50] 237.0 secs
[51] 239.0 secs
[52] 241.0 secs
[53] 234.0 secs
[54] 240.0 secs
[55] 159.0 secs
