# [운동 동작 분류 AI - Private 8위](https://dacon.io/codeshare/2393)

[catboost](https://dailyheumsi.tistory.com/136) : Category + Boosting     
범주(category)가 많을 때 유용한 알고리즘으로 높은 정확도를 보임.    
학습시간이 다른 모델에 비해 오래 걸리나 예측시간을 매우 빠름.    



[eli5](https://hong-yp-ml-records.tistory.com/51)    
Permutation Importance는 모델을 학습시킨 뒤, 특정 feature의 데이터를 shuffle 했을 때, 검증 데이터 셋에 대한 예측성능을 확인하고 feature importance를 계산한다.



In [15]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)


from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
from sklearn.cluster import KMeans

import eli5
from eli5.sklearn import PermutationImportance

from scipy.stats import skew
from scipy.stats import kurtosis

import math

In [5]:
train = pd.read_csv('../workout/train_features.csv')
test = pd.read_csv('../workout/test_features.csv')
y = pd.read_csv('../workout/train_labels.csv')
submission = pd.read_csv('../workout/sample_submission.csv')

## Custom function 

In [6]:
def range_func(x):
    return np.max(x) - np.min(x)


def iqr_func2(x) :
    q3, q1 = np.percentile(x, [20, 80])
    return q3-q1

def iqr_func3(x) :
    q3, q1 = np.percentile(x, [40, 60])
    return q3-q1

def iqr_func4(x) :
    q3, q1 = np.percentile(x, [15, 95])
    return q3-q1

def premad(x):
    return np.median(np.absolute(x-np.median(x, axis = 0)), axis = 0 )

def preskew(x):
    return skew(x)

def prekurt(x):
    return kurtosis(x, fisher = True ) # 정규 분포 

## Create change Data 

시간 변동에 따른 수치 변화 데이터    

해당 코드는 중복 반복문으로 인해 시간이 다소 소용되기 때문에 Times 간의 변화량을 이후 `groupby().diff()`을 활용하여 따로 처리 

In [None]:
train_change = pd.DataFrame()
change_train = train.set_index('id').drop('time', axis = 1)

for j in train['id'].unique() :
    temp = change_train[change_train.index == j ]
    
    for i in range(599):
        change = temp.iloc[i+1, : ] - temp.iloc[i,:]
        train_change = train_change.append(change) 

In [None]:
test_change =  pd.DataFrame()
change_test = test.set_index('id').drop('time', axis = 1)

for j in test['id'].unique():
    temp = change_test[change_test.index == j]
    
    for i in range(599):
        change = temp.iloc[i+1, :] - temp.iloc[i, :]
        test_change = test_change.append(change)

## Feature Engineering 

In [11]:
# 에너지 계산 
train['acc_vector']   = np.sqrt( train['acc_x'] ** 2 + train['acc_y'] ** 2 + train['acc_z'] ** 2 ) 
train['acc_XYvector'] = np.sqrt( train['acc_x'] ** 2 + train['acc_y'] ** 2 )
train['acc_XZvector'] = np.sqrt( train['acc_x'] ** 2 + train['acc_z'] ** 2 )
train['acc_YZvector'] = np.sqrt( train['acc_y'] ** 2 + train['acc_z'] ** 2 ) 

train['gy_vector']    = np.sqrt( train['gy_x'] ** 2 + train['gy_y'] ** 2 + train['gy_z'] ** 2 )
train['gy_XYvector']  = np.sqrt( train['gy_x'] ** 2 + train['gy_y'] ** 2 )
train['gy_XZvector']  = np.sqrt( train['gy_x'] ** 2 + train['gy_z'] ** 2 )
train['gy_YZctor']    = np.sqrt( train['gy_y'] ** 2 + train['gy_z'] ** 2 )

In [12]:
test['acc_vector']   = np.sqrt( test['acc_x'] ** 2 + test['acc_y'] ** 2 + test['acc_z'] ** 2 ) 
test['acc_XYvector'] = np.sqrt( test['acc_x'] ** 2 + test['acc_y'] ** 2 )
test['acc_XZvector'] = np.sqrt( test['acc_x'] ** 2 + test['acc_z'] ** 2 )
test['acc_YZvector'] = np.sqrt( test['acc_y'] ** 2 + test['acc_z'] ** 2 ) 

test['gy_vector']    = np.sqrt( test['gy_x'] ** 2 + test['gy_y'] ** 2 + test['gy_z'] ** 2 )
test['gy_XYvector']  = np.sqrt( test['gy_x'] ** 2 + test['gy_y'] ** 2 )
test['gy_XZvector']  = np.sqrt( test['gy_x'] ** 2 + test['gy_z'] ** 2 )
test['gy_YZctor']    = np.sqrt( test['gy_y'] ** 2 + test['gy_z'] ** 2 )

In [13]:
# 자이로스코프 무게 중심 
train['gy_Centerofgravity'] = (train['gy_x'] + train['gy_y'] + train['gy_z']) / 3
test['gy_Centerofgravity'] = (test['gy_x'] + test['gy_y'] + test['gy_z']) / 3

### [Roll & Pitch](https://now0930.pe.kr/wordpress/?p=1226)


물체가 이동하면서 어떠한 외부요인에 의해 이동하려는 목표 대비 오차(error)가 발생하게 됨.   
이러한 오차를 발생시키는 원인은 yaw, roll, pitch .    
그 중 가속도 센서로 알 수 있는 건 Roll과 Pitch 값.  

<img src = 'RollPitch계산.jpg'> 

In [16]:
pi = math.pi
# Roll & Pitch 
train['roll']          = np.arctan(train['acc_y'] / np.sqrt(train['acc_x']**2 + train['acc_z']**2))
train['pitch']         = np.arctan(train['acc_x'] / np.sqrt(train['acc_y']**2 + train['acc_z']**2))
train['math_roll']     = np.arctan( - train['acc_x'] / np.sqrt(train['acc_y']**2 + train['acc_z']**2)) * (180/pi)
train['math_pitch']    = np.arctan( train['acc_y'] / np.sqrt(train['acc_x']**2 + train['acc_z']**2)) * (180/pi)
train['gy_roll']       = np.arctan(train['gy_y']/np.sqrt(train['gy_x']**2 + train['gy_z']**2))
train['gy_pitch']      = np.arctan(train['gy_x']/np.sqrt(train['gy_y']**2 + train['gy_z']**2))
train['gy_math_roll']  = np.arctan(-train['gy_x'] / np.sqrt(train['gy_y']**2 + train['gy_z']**2)) * (180/pi)
train['gy_math_pitch'] = np.arctan(train['gy_y'] / np.sqrt(train['gy_x']**2 + train['gy_z']**2)) * (180/pi)

In [17]:
test['roll']           = np.arctan(test['acc_y'] / np.sqrt(test['acc_x']**2 + test['acc_z']**2))
test['pitch']          = np.arctan(test['acc_x'] / np.sqrt(test['acc_y']**2 + test['acc_z']**2))
test['math_roll']      = np.arctan( - test['acc_x'] / np.sqrt(test['acc_y']**2 + test['acc_z']**2)) * (180/pi)
test['math_pitch']     = np.arctan( test['acc_y'] / np.sqrt(test['acc_x']**2 + test['acc_z']**2)) * (180/pi)
test['gy_roll']        = np.arctan(test['gy_y']/np.sqrt(test['gy_x']**2 + test['gy_z']**2))
test['gy_pitch']       = np.arctan(test['gy_x']/np.sqrt(test['gy_y']**2 + test['gy_z']**2))
test['gy_math_roll']   = np.arctan(-test['gy_x'] / np.sqrt(test['gy_y']**2 + test['gy_z']**2)) * (180/pi)
test['gy_math_pitch']  = np.arctan(test['gy_y'] / np.sqrt(test['gy_x']**2 + test['gy_z']**2)) * (180/pi)

In [18]:
train.head(3)

Unnamed: 0,id,time,acc_x,acc_y,acc_z,gy_x,gy_y,gy_z,acc_vector,acc_XYvector,acc_XZvector,acc_YZvector,gy_vector,gy_XYvector,gy_XZvector,gy_YZctor,gy_Centerofgravity,roll,pitch,math_roll,math_pitch,gy_roll,gy_pitch,gy_math_roll,gy_math_pitch
0,0,0,1.206087,-0.179371,-0.148447,-0.591608,-30.54901,-31.676112,1.228355,1.219352,1.215188,0.232832,44.010999,30.554737,31.681636,44.007023,-20.93891,-0.14655,1.380095,-79.073624,-8.396683,-0.7672,-0.013443,0.770209,-43.957305
1,0,1,1.287696,-0.198974,-0.182444,0.3031,-39.139103,-24.927216,1.315689,1.302978,1.300557,0.269956,46.403958,39.140277,24.929059,46.402968,-21.254406,-0.151814,1.364146,-78.159835,-8.698302,-1.003661,0.006532,-0.374246,-57.505519
2,0,2,1.304609,-0.195114,-0.253382,-3.617278,-44.122565,-25.019629,1.343234,1.319119,1.328987,0.3198,50.851423,44.270593,25.279765,50.722604,-24.253157,-0.145773,1.330405,-76.226605,-8.352177,-1.050508,-0.071194,4.079137,-60.189701


In [23]:
train.columns[:17]

Index(['id', 'time', 'acc_x', 'acc_y', 'acc_z', 'gy_x', 'gy_y', 'gy_z',
       'acc_vector', 'acc_XYvector', 'acc_XZvector', 'acc_YZvector',
       'gy_vector', 'gy_XYvector', 'gy_XZvector', 'gy_YZctor',
       'gy_Centerofgravity'],
      dtype='object')

In [40]:
temp_train = train.iloc[:,:17].drop('time', axis = 1).groupby('id').agg(['max','min','mean','std','median'])

In [41]:
train_processing = train.drop('time', axis = 1).groupby('id').agg([range_func, iqr_func2, iqr_func3, iqr_func4,premad, preskew, prekurt])

In [151]:
train_process = pd.concat([temp_train, train_processing], axis = 1)

In [152]:
temp_test = test.iloc[:,:17].drop('time',axis = 1).groupby('id').agg(['max','min','mean','std','median'])
test_processing = test.drop('time', axis = 1).groupby('id').agg([range_func, iqr_func2, iqr_func3, iqr_func4, premad, preskew, prekurt])

In [153]:
test_process = pd.concat([temp_test, test_processing], axis = 1)
print(train_process.shape, test_process.shape)

(3125, 236) (782, 236)


In [154]:
train_process.columns = [i[0] +'_'+ i[1] for i in train_process.columns]
test_process.columns = [i[0] + '_'+ i[1] for i in test_process.columns]

In [155]:
train_process.columns

Index(['acc_x_max', 'acc_x_min', 'acc_x_mean', 'acc_x_std', 'acc_x_median',
       'acc_y_max', 'acc_y_min', 'acc_y_mean', 'acc_y_std', 'acc_y_median',
       ...
       'gy_math_roll_premad', 'gy_math_roll_preskew', 'gy_math_roll_prekurt',
       'gy_math_pitch_range_func', 'gy_math_pitch_iqr_func2',
       'gy_math_pitch_iqr_func3', 'gy_math_pitch_iqr_func4',
       'gy_math_pitch_premad', 'gy_math_pitch_preskew',
       'gy_math_pitch_prekurt'],
      dtype='object', length=236)

In [156]:
# 표준 편차 평균 
train_process['acc_std_mean'] = ( train_process['acc_x_std'] + train_process['acc_y_std'] + train_process['acc_z_std']) /3
train_process['gy_std_mean'] = ( train_process['gy_x_std'] + train_process['gy_y_std'] + train_process['gy_z_std']) /3


test_process['acc_std_mean'] = ( test_process['acc_x_std'] + test_process['acc_y_std'] + test_process['acc_z_std']) /3
test_process['gy_std_mean'] = ( test_process['gy_x_std'] + test_process['gy_y_std'] + test_process['gy_z_std']) /3


In [157]:
# 가속도계 첫번째 데이터 

train_acc_h1 = train.iloc[:,:5].drop_duplicates(['id'], keep ='first').drop('time', axis = 1)
train_acc_h1.columns = ['id','first_acc_x','first_acc_y','first_acc_z']
train_acc_h1.set_index('id', inplace =True)
train_acc_h1.head(2)

Unnamed: 0_level_0,first_acc_x,first_acc_y,first_acc_z
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1.206087,-0.179371,-0.148447
1,-0.211795,-0.07876,0.854627


In [158]:
test_acc_h1 = test.iloc[:,:5].drop_duplicates(['id'], keep ='first').drop('time', axis = 1)
test_acc_h1.columns = ['id','first_acc_x','first_acc_y','first_acc_z']
test_acc_h1.set_index('id', inplace =True)
test_acc_h1.head(2)

Unnamed: 0_level_0,first_acc_x,first_acc_y,first_acc_z
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3125,-0.6281,-0.160155,0.151487
3126,0.304222,1.529324,-0.338265


In [159]:
# 가속도계 첫 3초

train.iloc[:,:5][train['time']<150] # 0.02초 * 150 Times = 3초 

Unnamed: 0,id,time,acc_x,acc_y,acc_z
0,0,0,1.206087,-0.179371,-0.148447
1,0,1,1.287696,-0.198974,-0.182444
2,0,2,1.304609,-0.195114,-0.253382
3,0,3,1.293095,-0.230366,-0.215210
4,0,4,1.300887,-0.187757,-0.222523
...,...,...,...,...,...
1874545,3124,145,-0.807254,-0.555749,0.263630
1874546,3124,146,-0.773207,-0.591142,0.283675
1874547,3124,147,-0.747712,-0.602606,0.305241
1874548,3124,148,-0.715429,-0.609984,0.338578


In [160]:
train_acc_3 = train.iloc[:,:5][train['time']<150].drop('time',axis = 1).groupby('id').mean()  
train_acc_3.columns = ['head_acc_x','head_acc_y','head_acc_z']
train_acc_3.head(3)

Unnamed: 0_level_0,head_acc_x,head_acc_y,head_acc_z
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.930401,-0.204424,-0.357682
1,-0.313634,-0.567014,0.471395
2,0.005221,-0.1131,0.600223


In [161]:
test_acc_3 = test.iloc[:,:5][test['time']<150].drop('time',axis = 1).groupby('id').mean()  
test_acc_3.columns = ['head_acc_x','head_acc_y','head_acc_z']
test_acc_3.head(3)

Unnamed: 0_level_0,head_acc_x,head_acc_y,head_acc_z
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3125,-0.959972,-0.027414,-0.263373
3126,-0.455545,0.862888,-0.468532
3127,0.469283,0.979939,-0.211281


In [162]:
train_preprocess = pd.concat([train_process, train_acc_h1, train_acc_3], axis= 1)
test_preprocess = pd.concat([test_process, test_acc_h1, test_acc_3], axis = 1)

In [163]:
print(train_preprocess.shape, test_preprocess.shape)

(3125, 244) (782, 244)


In [164]:
# 자이로스코프 첫 3초 

gy = ['id', 'gy_x','gy_y','gy_z']
train_gy_3 = train[gy][train['time'] < 150].groupby('id').mean()
train_gy_3.columns = ['head_gy_x','head_gy_y','head_gy_z']

In [165]:
test_gy_3 = test[gy][test['time'] <150].groupby('id').mean()
test_gy_3.columns = ['head_gy_x','head_gy_y','head_gy_z']

In [166]:
train_preprocess = pd.concat([train_preprocess, train_gy_3], axis = 1)
test_preprocess = pd.concat([test_preprocess, test_gy_3], axis = 1)

In [167]:
print(train_preprocess.shape, test_preprocess.shape)

(3125, 247) (782, 247)


In [168]:
# 센서 변화량 측정  
train.groupby(['id'])['acc_x', 'acc_y','acc_z','gy_x','gy_y','gy_z'].diff() 

Unnamed: 0,acc_x,acc_y,acc_z,gy_x,gy_y,gy_z
0,,,,,,
1,0.081610,-0.019602,-0.033997,0.894708,-8.590094,6.748896
2,0.016913,0.003859,-0.070939,-3.920378,-4.983461,-0.092413
3,-0.011514,-0.035252,0.038173,6.330264,-9.475278,-2.434384
4,0.007792,0.042609,-0.007313,1.573721,-4.308718,-0.507222
...,...,...,...,...,...,...
1874995,0.029693,0.006073,0.016001,-3.012893,-0.692606,-0.167602
1874996,0.029493,-0.000109,0.035515,-0.781232,2.216855,-0.334650
1874997,0.018306,-0.008159,0.034891,2.275994,3.020737,-2.740038
1874998,0.034197,-0.015940,0.009582,4.236545,-0.363423,-0.894353


In [169]:
temp = pd.concat([train['id'], train.groupby(['id'])['acc_x', 'acc_y','acc_z','gy_x','gy_y','gy_z'].diff()], axis= 1 ).dropna()

In [170]:
temp['Croll'] = np.arctan(temp['acc_y']/ np.sqrt(temp['acc_x']**2 + temp['acc_z']**2))
temp['Cpitch'] = np.arctan(temp['acc_x']/np.sqrt(temp['acc_y']**2 + temp['acc_z']**2))
temp['Cmath_roll'] = np.arctan(-temp['acc_x']/ np.sqrt(temp['acc_y']**2 + temp['acc_z']**2)) * (180/pi)
temp['Cmath_pitch'] = np.arctan(temp['acc_y']/ np.sqrt(temp['acc_x']**2 + temp['acc_z']**2)) * (180/pi)

temp['Cgyroll'] = np.arctan(temp['gy_y']/np.sqrt(temp['gy_x']**2 + temp['gy_z']**2))
temp['Cgypitch'] = np.arctan(temp['gy_x']/np.sqrt(temp['gy_y']**2 + temp['gy_z']**2))
temp['Cgymath_roll'] = np.arctan(-temp['gy_x']/np.sqrt(temp['gy_y']**2 + temp['gy_z']**2)) *(180/pi)
temp['Cgymath_pitch'] = np.arctan(temp['gy_y']/np.sqrt(temp['gy_x']**2 + temp['gy_z']**2)) *(180/pi)

In [171]:
ca_list = ['id','Cacc_x','Cacc_y','Cacc_z','Cgy_x','Cgy_y','Cgy_z']
temp.columns = ca_list + list(temp.iloc[:,7:].columns)

In [172]:
tmp = temp.groupby('id').agg([range_func, iqr_func2, iqr_func3, iqr_func4, premad, preskew, prekurt])
tmp.head(3)

Unnamed: 0_level_0,Cacc_x,Cacc_x,Cacc_x,Cacc_x,Cacc_x,Cacc_x,Cacc_x,Cacc_y,Cacc_y,Cacc_y,Cacc_y,Cacc_y,Cacc_y,Cacc_y,Cacc_z,Cacc_z,Cacc_z,Cacc_z,Cacc_z,Cacc_z,Cacc_z,Cgy_x,Cgy_x,Cgy_x,Cgy_x,Cgy_x,Cgy_x,Cgy_x,Cgy_y,Cgy_y,Cgy_y,Cgy_y,Cgy_y,Cgy_y,Cgy_y,Cgy_z,Cgy_z,Cgy_z,Cgy_z,Cgy_z,Cgy_z,Cgy_z,Croll,Croll,Croll,Croll,Croll,Croll,Croll,Cpitch,Cpitch,Cpitch,Cpitch,Cpitch,Cpitch,Cpitch,Cmath_roll,Cmath_roll,Cmath_roll,Cmath_roll,Cmath_roll,Cmath_roll,Cmath_roll,Cmath_pitch,Cmath_pitch,Cmath_pitch,Cmath_pitch,Cmath_pitch,Cmath_pitch,Cmath_pitch,Cgyroll,Cgyroll,Cgyroll,Cgyroll,Cgyroll,Cgyroll,Cgyroll,Cgypitch,Cgypitch,Cgypitch,Cgypitch,Cgypitch,Cgypitch,Cgypitch,Cgymath_roll,Cgymath_roll,Cgymath_roll,Cgymath_roll,Cgymath_roll,Cgymath_roll,Cgymath_roll,Cgymath_pitch,Cgymath_pitch,Cgymath_pitch,Cgymath_pitch,Cgymath_pitch,Cgymath_pitch,Cgymath_pitch
Unnamed: 0_level_1,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt,range_func,iqr_func2,iqr_func3,iqr_func4,premad,preskew,prekurt
id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2,Unnamed: 82_level_2,Unnamed: 83_level_2,Unnamed: 84_level_2,Unnamed: 85_level_2,Unnamed: 86_level_2,Unnamed: 87_level_2,Unnamed: 88_level_2,Unnamed: 89_level_2,Unnamed: 90_level_2,Unnamed: 91_level_2,Unnamed: 92_level_2,Unnamed: 93_level_2,Unnamed: 94_level_2,Unnamed: 95_level_2,Unnamed: 96_level_2,Unnamed: 97_level_2,Unnamed: 98_level_2
0,0.350553,-0.086247,-0.027968,-0.14036,0.034023,0.201069,0.184567,0.510419,-0.1088,-0.028574,-0.180126,0.041689,-0.224045,0.79973,0.500255,-0.105577,-0.028014,-0.188058,0.038816,0.166563,1.211711,65.48026,-13.889571,-3.869127,-23.585541,5.419533,0.041505,0.868294,37.129643,-8.924823,-2.690859,-14.098464,3.699058,0.359877,0.502568,34.13491,-7.172572,-2.040204,-12.030779,2.833154,0.20025,0.693328,3.034129,-1.458924,-0.420265,-2.101978,0.598439,-0.004368,-0.93404,2.871435,-1.060137,-0.331797,-1.652963,0.415447,0.051005,-0.513209,164.521122,-60.741397,-19.010576,-93.186134,23.803348,-0.051005,-0.513209,173.842806,-83.590203,-24.079415,-120.434447,34.288042,-0.004368,-0.93404,2.96486,-1.117733,-0.333715,-1.703317,0.447856,-0.003416,-0.584324,3.071607,-1.855611,-0.657876,-2.326526,0.774576,0.09075,-1.277056,175.990145,-106.31865,-37.693509,-133.518563,44.379921,-0.09075,-1.277056,169.873979,-64.041361,-19.120451,-97.59285,25.660234,-0.003416,-0.584324
1,1.834542,-0.116587,-0.021564,-0.259661,0.038897,0.309264,15.576594,0.804631,-0.111154,-0.023733,-0.223176,0.040081,0.127286,3.494794,0.851648,-0.085792,-0.020637,-0.186626,0.033583,0.902761,5.991293,273.314699,-21.121511,-4.788293,-60.485939,7.30574,0.256503,5.516639,254.7305,-18.228214,-3.362948,-40.85991,6.293965,0.320409,9.681797,131.191643,-11.465256,-2.487675,-33.794652,3.947651,0.068497,3.872017,2.949186,-1.355521,-0.467312,-1.940425,0.577064,-0.012845,-0.94599,3.069593,-1.448206,-0.439248,-2.033844,0.578359,0.00044,-0.955137,175.874715,-82.976106,-25.167077,-118.545346,33.137543,-0.00044,-0.955137,168.9759,-77.665614,-26.774979,-111.178168,33.063357,-0.012845,-0.94599,3.001347,-1.227055,-0.369492,-1.899532,0.489228,-0.073283,-0.620949,3.078396,-1.67191,-0.498409,-2.271373,0.713602,0.044136,-1.145265,176.3791,-95.793405,-28.556755,-129.088877,40.886396,-0.044136,-1.145265,171.964513,-70.305098,-21.170336,-108.835181,28.030691,-0.073283,-0.620949
2,1.125718,-0.12428,-0.028638,-0.323444,0.043256,-0.002835,3.275095,1.907342,-0.141848,-0.029962,-0.39471,0.051895,-0.053779,5.570731,0.952028,-0.110865,-0.031669,-0.204149,0.04154,0.075537,4.722602,221.74182,-19.978814,-4.692393,-52.271026,7.473219,0.205375,4.997718,130.866485,-24.397771,-7.384581,-43.581835,9.835206,0.172387,1.098132,63.751136,-9.539324,-2.311907,-19.274785,3.842362,0.351238,2.38941,3.006554,-1.518542,-0.411151,-2.178463,0.621968,-0.003137,-0.991472,2.991025,-1.265548,-0.327568,-1.908854,0.516071,-0.009967,-0.731086,171.373104,-72.51054,-18.768239,-108.600091,29.568697,0.009967,-0.731086,172.262829,-87.006066,-23.557219,-124.81675,35.636118,-0.003137,-0.991472,3.10835,-1.818905,-0.642374,-2.336374,0.732678,-0.047991,-1.184095,3.060012,-1.479478,-0.40642,-2.15676,0.58199,0.035938,-0.89736,175.325789,-84.767847,-23.286136,-122.187845,33.345552,-0.035938,-0.89736,178.095309,-104.215556,-36.805336,-133.864373,41.979341,-0.047991,-1.184095


In [173]:
train_preprocess = pd.concat([train_preprocess, tmp], axis = 1)

In [174]:
temp = pd.concat([test['id'], test.groupby(['id'])['acc_x', 'acc_y','acc_z','gy_x','gy_y','gy_z'].diff()], axis= 1 ).dropna()
temp['Croll'] = np.arctan(temp['acc_y']/ np.sqrt(temp['acc_x']**2 + temp['acc_z']**2))
temp['Cpitch'] = np.arctan(temp['acc_x']/np.sqrt(temp['acc_y']**2 + temp['acc_z']**2))
temp['Cmath_roll'] = np.arctan(-temp['acc_x']/ np.sqrt(temp['acc_y']**2 + temp['acc_z']**2)) * (180/pi)
temp['Cmath_pitch'] = np.arctan(temp['acc_y']/ np.sqrt(temp['acc_x']**2 + temp['acc_z']**2)) * (180/pi)

temp['Cgyroll'] = np.arctan(temp['gy_y']/np.sqrt(temp['gy_x']**2 + temp['gy_z']**2))
temp['Cgypitch'] = np.arctan(temp['gy_x']/np.sqrt(temp['gy_y']**2 + temp['gy_z']**2))
temp['Cgymath_roll'] = np.arctan(-temp['gy_x']/np.sqrt(temp['gy_y']**2 + temp['gy_z']**2)) *(180/pi)
temp['Cgymath_pitch'] = np.arctan(temp['gy_y']/np.sqrt(temp['gy_x']**2 + temp['gy_z']**2)) *(180/pi)

temp.columns = ca_list + list(temp.iloc[:,7:].columns)
tmp = temp.groupby('id').agg([range_func, iqr_func2, iqr_func3, iqr_func4, premad, preskew, prekurt])
test_preprocess = pd.concat([test_preprocess, tmp], axis = 1)

In [175]:
print(train_preprocess.shape, test_preprocess.shape)

(3125, 345) (782, 345)


## Clustering Kmeans
5개의 군집을 새로운 파생변수로 사용. 

In [176]:
model = KMeans(n_clusters = 5, random_state = 20)
model.fit(train_preprocess)
train_predict = model.predict(train_preprocess)

In [177]:
train_preprocess['cluster'] = train_predict

In [178]:
test_predict = model.predict(test_preprocess)
test_preprocess['cluster'] = test_predict

In [179]:
train_preprocess.iloc[:,247:-1].columns

Index([       ('Cacc_x', 'range_func'),         ('Cacc_x', 'iqr_func2'),
               ('Cacc_x', 'iqr_func3'),         ('Cacc_x', 'iqr_func4'),
                  ('Cacc_x', 'premad'),           ('Cacc_x', 'preskew'),
                 ('Cacc_x', 'prekurt'),        ('Cacc_y', 'range_func'),
               ('Cacc_y', 'iqr_func2'),         ('Cacc_y', 'iqr_func3'),
               ('Cacc_y', 'iqr_func4'),            ('Cacc_y', 'premad'),
                 ('Cacc_y', 'preskew'),           ('Cacc_y', 'prekurt'),
              ('Cacc_z', 'range_func'),         ('Cacc_z', 'iqr_func2'),
               ('Cacc_z', 'iqr_func3'),         ('Cacc_z', 'iqr_func4'),
                  ('Cacc_z', 'premad'),           ('Cacc_z', 'preskew'),
                 ('Cacc_z', 'prekurt'),         ('Cgy_x', 'range_func'),
                ('Cgy_x', 'iqr_func2'),          ('Cgy_x', 'iqr_func3'),
                ('Cgy_x', 'iqr_func4'),             ('Cgy_x', 'premad'),
                  ('Cgy_x', 'preskew'),            

In [180]:
train_preprocess.iloc[:,-1:].columns

Index(['cluster'], dtype='object')

In [181]:
columns_names = list(train_preprocess.iloc[:,:247].columns)
columns_names.extend([ i[0]+'_'+i[1] for i in train_preprocess.iloc[:,247:-1].columns ] )
columns_names.extend(list(train_preprocess.iloc[:,-1:].columns))

In [182]:
len(columns_names)

346

In [183]:
train_preprocess.columns = columns_names
test_preprocess.columns = columns_names

In [184]:
train_preprocess.head(3)

Unnamed: 0_level_0,acc_x_max,acc_x_min,acc_x_mean,acc_x_std,acc_x_median,acc_y_max,acc_y_min,acc_y_mean,acc_y_std,acc_y_median,acc_z_max,acc_z_min,acc_z_mean,acc_z_std,acc_z_median,gy_x_max,gy_x_min,gy_x_mean,gy_x_std,gy_x_median,gy_y_max,gy_y_min,gy_y_mean,gy_y_std,gy_y_median,gy_z_max,gy_z_min,gy_z_mean,gy_z_std,gy_z_median,acc_vector_max,acc_vector_min,acc_vector_mean,acc_vector_std,acc_vector_median,acc_XYvector_max,acc_XYvector_min,acc_XYvector_mean,acc_XYvector_std,acc_XYvector_median,acc_XZvector_max,acc_XZvector_min,acc_XZvector_mean,acc_XZvector_std,acc_XZvector_median,acc_YZvector_max,acc_YZvector_min,acc_YZvector_mean,acc_YZvector_std,acc_YZvector_median,gy_vector_max,gy_vector_min,gy_vector_mean,gy_vector_std,gy_vector_median,gy_XYvector_max,gy_XYvector_min,gy_XYvector_mean,gy_XYvector_std,gy_XYvector_median,gy_XZvector_max,gy_XZvector_min,gy_XZvector_mean,gy_XZvector_std,gy_XZvector_median,gy_YZctor_max,gy_YZctor_min,gy_YZctor_mean,gy_YZctor_std,gy_YZctor_median,gy_Centerofgravity_max,gy_Centerofgravity_min,gy_Centerofgravity_mean,gy_Centerofgravity_std,gy_Centerofgravity_median,acc_x_range_func,acc_x_iqr_func2,acc_x_iqr_func3,acc_x_iqr_func4,acc_x_premad,acc_x_preskew,acc_x_prekurt,acc_y_range_func,acc_y_iqr_func2,acc_y_iqr_func3,acc_y_iqr_func4,acc_y_premad,acc_y_preskew,acc_y_prekurt,acc_z_range_func,acc_z_iqr_func2,acc_z_iqr_func3,acc_z_iqr_func4,acc_z_premad,acc_z_preskew,acc_z_prekurt,gy_x_range_func,gy_x_iqr_func2,gy_x_iqr_func3,gy_x_iqr_func4,gy_x_premad,gy_x_preskew,gy_x_prekurt,gy_y_range_func,gy_y_iqr_func2,gy_y_iqr_func3,gy_y_iqr_func4,gy_y_premad,gy_y_preskew,gy_y_prekurt,gy_z_range_func,gy_z_iqr_func2,gy_z_iqr_func3,gy_z_iqr_func4,gy_z_premad,gy_z_preskew,gy_z_prekurt,acc_vector_range_func,acc_vector_iqr_func2,acc_vector_iqr_func3,acc_vector_iqr_func4,acc_vector_premad,acc_vector_preskew,acc_vector_prekurt,acc_XYvector_range_func,acc_XYvector_iqr_func2,acc_XYvector_iqr_func3,acc_XYvector_iqr_func4,acc_XYvector_premad,acc_XYvector_preskew,acc_XYvector_prekurt,acc_XZvector_range_func,acc_XZvector_iqr_func2,acc_XZvector_iqr_func3,acc_XZvector_iqr_func4,acc_XZvector_premad,acc_XZvector_preskew,acc_XZvector_prekurt,acc_YZvector_range_func,acc_YZvector_iqr_func2,acc_YZvector_iqr_func3,acc_YZvector_iqr_func4,acc_YZvector_premad,acc_YZvector_preskew,acc_YZvector_prekurt,gy_vector_range_func,gy_vector_iqr_func2,gy_vector_iqr_func3,gy_vector_iqr_func4,gy_vector_premad,gy_vector_preskew,gy_vector_prekurt,gy_XYvector_range_func,gy_XYvector_iqr_func2,gy_XYvector_iqr_func3,gy_XYvector_iqr_func4,gy_XYvector_premad,gy_XYvector_preskew,gy_XYvector_prekurt,gy_XZvector_range_func,gy_XZvector_iqr_func2,gy_XZvector_iqr_func3,gy_XZvector_iqr_func4,gy_XZvector_premad,gy_XZvector_preskew,gy_XZvector_prekurt,gy_YZctor_range_func,gy_YZctor_iqr_func2,gy_YZctor_iqr_func3,gy_YZctor_iqr_func4,gy_YZctor_premad,gy_YZctor_preskew,gy_YZctor_prekurt,gy_Centerofgravity_range_func,gy_Centerofgravity_iqr_func2,gy_Centerofgravity_iqr_func3,gy_Centerofgravity_iqr_func4,gy_Centerofgravity_premad,gy_Centerofgravity_preskew,gy_Centerofgravity_prekurt,roll_range_func,roll_iqr_func2,roll_iqr_func3,roll_iqr_func4,roll_premad,roll_preskew,roll_prekurt,pitch_range_func,pitch_iqr_func2,pitch_iqr_func3,pitch_iqr_func4,pitch_premad,pitch_preskew,pitch_prekurt,math_roll_range_func,math_roll_iqr_func2,math_roll_iqr_func3,math_roll_iqr_func4,math_roll_premad,math_roll_preskew,math_roll_prekurt,math_pitch_range_func,math_pitch_iqr_func2,math_pitch_iqr_func3,math_pitch_iqr_func4,math_pitch_premad,math_pitch_preskew,math_pitch_prekurt,gy_roll_range_func,gy_roll_iqr_func2,gy_roll_iqr_func3,gy_roll_iqr_func4,gy_roll_premad,gy_roll_preskew,gy_roll_prekurt,gy_pitch_range_func,gy_pitch_iqr_func2,gy_pitch_iqr_func3,gy_pitch_iqr_func4,gy_pitch_premad,gy_pitch_preskew,gy_pitch_prekurt,gy_math_roll_range_func,gy_math_roll_iqr_func2,gy_math_roll_iqr_func3,gy_math_roll_iqr_func4,gy_math_roll_premad,gy_math_roll_preskew,gy_math_roll_prekurt,gy_math_pitch_range_func,gy_math_pitch_iqr_func2,gy_math_pitch_iqr_func3,gy_math_pitch_iqr_func4,gy_math_pitch_premad,gy_math_pitch_preskew,gy_math_pitch_prekurt,acc_std_mean,gy_std_mean,first_acc_x,first_acc_y,first_acc_z,head_acc_x,head_acc_y,head_acc_z,head_gy_x,head_gy_y,head_gy_z,Cacc_x_range_func,Cacc_x_iqr_func2,Cacc_x_iqr_func3,Cacc_x_iqr_func4,Cacc_x_premad,Cacc_x_preskew,Cacc_x_prekurt,Cacc_y_range_func,Cacc_y_iqr_func2,Cacc_y_iqr_func3,Cacc_y_iqr_func4,Cacc_y_premad,Cacc_y_preskew,Cacc_y_prekurt,Cacc_z_range_func,Cacc_z_iqr_func2,Cacc_z_iqr_func3,Cacc_z_iqr_func4,Cacc_z_premad,Cacc_z_preskew,Cacc_z_prekurt,Cgy_x_range_func,Cgy_x_iqr_func2,Cgy_x_iqr_func3,Cgy_x_iqr_func4,Cgy_x_premad,Cgy_x_preskew,Cgy_x_prekurt,Cgy_y_range_func,Cgy_y_iqr_func2,Cgy_y_iqr_func3,Cgy_y_iqr_func4,Cgy_y_premad,Cgy_y_preskew,Cgy_y_prekurt,Cgy_z_range_func,Cgy_z_iqr_func2,Cgy_z_iqr_func3,Cgy_z_iqr_func4,Cgy_z_premad,Cgy_z_preskew,Cgy_z_prekurt,Croll_range_func,Croll_iqr_func2,Croll_iqr_func3,Croll_iqr_func4,Croll_premad,Croll_preskew,Croll_prekurt,Cpitch_range_func,Cpitch_iqr_func2,Cpitch_iqr_func3,Cpitch_iqr_func4,Cpitch_premad,Cpitch_preskew,Cpitch_prekurt,Cmath_roll_range_func,Cmath_roll_iqr_func2,Cmath_roll_iqr_func3,Cmath_roll_iqr_func4,Cmath_roll_premad,Cmath_roll_preskew,Cmath_roll_prekurt,Cmath_pitch_range_func,Cmath_pitch_iqr_func2,Cmath_pitch_iqr_func3,Cmath_pitch_iqr_func4,Cmath_pitch_premad,Cmath_pitch_preskew,Cmath_pitch_prekurt,Cgyroll_range_func,Cgyroll_iqr_func2,Cgyroll_iqr_func3,Cgyroll_iqr_func4,Cgyroll_premad,Cgyroll_preskew,Cgyroll_prekurt,Cgypitch_range_func,Cgypitch_iqr_func2,Cgypitch_iqr_func3,Cgypitch_iqr_func4,Cgypitch_premad,Cgypitch_preskew,Cgypitch_prekurt,Cgymath_roll_range_func,Cgymath_roll_iqr_func2,Cgymath_roll_iqr_func3,Cgymath_roll_iqr_func4,Cgymath_roll_premad,Cgymath_roll_preskew,Cgymath_roll_prekurt,Cgymath_pitch_range_func,Cgymath_pitch_iqr_func2,Cgymath_pitch_iqr_func3,Cgymath_pitch_iqr_func4,Cgymath_pitch_premad,Cgymath_pitch_preskew,Cgymath_pitch_prekurt,cluster
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1
0,1.344268,0.59194,0.931329,0.191479,0.956149,0.176871,-0.624113,-0.218471,0.177131,-0.240638,-0.054876,-0.786336,-0.370422,0.135131,-0.346749,31.644123,-46.254836,-1.865269,13.284216,-1.273569,69.847244,-85.887677,-3.359506,24.300479,-2.36223,55.953827,-79.930029,1.182107,25.275185,1.913286,1.464711,0.700301,1.053002,0.172356,1.052954,1.390464,0.595115,0.967736,0.215859,1.006181,1.42093,0.699634,1.018122,0.151276,1.013342,0.794231,0.232832,0.473279,0.102563,0.467988,96.204555,1.978401,32.118575,19.715056,27.491634,88.634207,0.930562,23.387205,15.293768,19.912021,82.06914,0.473673,24.576975,14.668129,21.917642,94.548967,1.281162,28.110417,21.225888,21.458241,43.994765,-45.377153,-1.347556,16.556301,-1.259386,0.752327,-0.369662,-0.172673,-0.544985,0.164385,0.106076,-1.161667,0.800985,-0.339302,-0.12396,-0.468257,0.138511,0.054884,-0.881409,0.731459,-0.230312,-0.060193,-0.353804,0.082231,-0.595532,-0.148805,77.898959,-21.73399,-6.42314,-32.746229,8.773819,-0.398648,0.211136,155.734921,-32.537213,-8.711901,-62.134828,13.181243,-0.204916,0.732569,135.883856,-38.84421,-9.41338,-63.741066,14.828431,-0.577464,0.366905,0.764409,-0.328811,-0.130663,-0.491498,0.140697,0.178378,-0.981278,0.795349,-0.423951,-0.205698,-0.597643,0.196965,0.051286,-1.299702,0.721296,-0.27887,-0.091615,-0.45108,0.117718,0.250073,-0.730244,0.5614,-0.178997,-0.052528,-0.284987,0.072405,0.243839,-0.333986,94.226154,-32.373991,-10.771451,-59.674426,12.317919,0.923424,0.319576,87.703645,-24.420074,-6.823188,-44.473889,8.61002,1.096306,1.105979,81.595467,-23.451244,-7.117599,-40.513998,9.512299,0.899471,0.85873,93.267805,-35.619047,-11.004654,-62.935609,12.799203,0.96537,0.17889,89.371917,-26.895298,-8.555599,-43.74559,10.716499,-0.081081,-0.204728,0.724141,-0.283079,-0.087735,-0.416993,0.103789,0.322907,-0.706524,0.708736,-0.209801,-0.04876,-0.323442,0.069249,-0.775604,0.025097,40.607573,-12.020709,-2.793739,-21.861214,3.96765,0.775604,0.025097,41.490237,-16.219237,-5.026859,-23.891958,5.946675,0.322907,-0.706524,2.833836,-1.305992,-0.490935,-1.791598,0.533758,0.094874,-1.064726,2.977117,-0.982953,-0.247226,-1.925501,0.351814,0.12856,-0.14317,170.576232,-56.319063,-14.164984,-92.578624,20.157464,-0.12856,-0.14317,162.366819,-74.827804,-28.128525,-102.651024,30.582057,0.094874,-1.064726,0.167914,20.953293,1.206087,-0.179371,-0.148447,0.930401,-0.204424,-0.357682,-2.033206,-7.620945,-1.012112,0.350553,-0.086247,-0.027968,-0.14036,0.034023,0.201069,0.184567,0.510419,-0.1088,-0.028574,-0.180126,0.041689,-0.224045,0.79973,0.500255,-0.105577,-0.028014,-0.188058,0.038816,0.166563,1.211711,65.48026,-13.889571,-3.869127,-23.585541,5.419533,0.041505,0.868294,37.129643,-8.924823,-2.690859,-14.098464,3.699058,0.359877,0.502568,34.13491,-7.172572,-2.040204,-12.030779,2.833154,0.20025,0.693328,3.034129,-1.458924,-0.420265,-2.101978,0.598439,-0.004368,-0.93404,2.871435,-1.060137,-0.331797,-1.652963,0.415447,0.051005,-0.513209,164.521122,-60.741397,-19.010576,-93.186134,23.803348,-0.051005,-0.513209,173.842806,-83.590203,-24.079415,-120.434447,34.288042,-0.004368,-0.93404,2.96486,-1.117733,-0.333715,-1.703317,0.447856,-0.003416,-0.584324,3.071607,-1.855611,-0.657876,-2.326526,0.774576,0.09075,-1.277056,175.990145,-106.31865,-37.693509,-133.518563,44.379921,-0.09075,-1.277056,169.873979,-64.041361,-19.120451,-97.59285,25.660234,-0.003416,-0.584324,0
1,1.23402,-2.156208,-0.76658,0.495528,-0.805767,0.700065,-1.295598,-0.317258,0.336415,-0.228905,0.888661,-1.019531,-0.004223,0.499395,-0.034583,286.624363,-325.328531,11.0716,79.244561,3.81065,389.60806,-315.096003,1.740475,96.005289,8.043707,340.170199,-270.980823,1.393294,75.545343,-0.655819,2.196166,0.267319,1.096231,0.306356,1.051493,2.159331,0.148458,0.942026,0.399093,0.916721,2.193095,0.258524,0.974214,0.364569,0.951912,1.344783,0.017934,0.614497,0.291843,0.657428,536.214788,0.993903,111.047766,94.759619,85.896843,455.1807,0.61796,92.919778,83.510694,69.823901,403.100923,0.771792,82.785383,72.433885,64.964235,460.382431,0.353606,86.654114,86.067543,61.04299,307.888702,-282.173222,4.735123,65.497872,7.167186,3.390228,-0.89232,-0.259957,-1.126772,0.321762,-0.202959,0.229016,1.995663,-0.62478,-0.261024,-0.888156,0.232913,-0.145735,-0.589686,1.908192,-0.911407,-0.190953,-1.412183,0.38246,-0.137812,-0.764662,611.952894,-113.384251,-20.81652,-192.176608,37.677737,-0.345188,2.101102,704.704062,-81.603851,-20.981261,-193.633032,27.991032,-0.486011,2.828346,611.151022,-87.20543,-15.41556,-165.339161,34.252342,0.013679,3.906192,1.928846,-0.342386,-0.104838,-0.906596,0.130239,1.127573,1.962747,2.010872,-0.545287,-0.160073,-1.188818,0.198182,0.485998,0.525718,1.934571,-0.574976,-0.18191,-1.128214,0.212615,0.782876,0.624354,1.326849,-0.59837,-0.185355,-0.74988,0.238548,-0.285172,-0.998284,535.220885,-139.487688,-37.597367,-260.125009,49.018489,1.610304,2.953581,454.56274,-121.940758,-29.32638,-243.763534,43.957704,1.653298,3.086353,402.329132,-110.01277,-35.747911,-206.412478,43.126617,1.537488,2.929244,460.028825,-113.475881,-30.858755,-256.10974,39.488449,1.756634,3.057427,590.061924,-50.7789,-12.641309,-133.801698,18.503918,-0.391348,5.02585,1.73944,-0.718567,-0.272615,-1.012047,0.228442,-0.389179,-0.938336,2.299613,-1.032235,-0.214801,-1.240473,0.386134,0.41678,-0.721744,131.758129,-59.142701,-12.307163,-72.968196,22.123866,-0.41678,-0.721744,99.662551,-41.170872,-15.619687,-57.986042,13.088737,-0.389179,-0.938336,3.082769,-1.296626,-0.379454,-1.989773,0.51729,-0.150883,-0.683533,2.866306,-1.475187,-0.395792,-1.982914,0.623829,-0.100541,-1.006218,164.227251,-84.522017,-22.677214,-119.037459,35.742781,0.100541,-1.006218,176.629662,-74.291169,-21.741119,-114.005603,29.638538,-0.150883,-0.683533,0.443779,83.598398,-0.211795,-0.07876,0.854627,-0.313634,-0.567014,0.471395,-26.182937,8.268198,10.465675,1.834542,-0.116587,-0.021564,-0.259661,0.038897,0.309264,15.576594,0.804631,-0.111154,-0.023733,-0.223176,0.040081,0.127286,3.494794,0.851648,-0.085792,-0.020637,-0.186626,0.033583,0.902761,5.991293,273.314699,-21.121511,-4.788293,-60.485939,7.30574,0.256503,5.516639,254.7305,-18.228214,-3.362948,-40.85991,6.293965,0.320409,9.681797,131.191643,-11.465256,-2.487675,-33.794652,3.947651,0.068497,3.872017,2.949186,-1.355521,-0.467312,-1.940425,0.577064,-0.012845,-0.94599,3.069593,-1.448206,-0.439248,-2.033844,0.578359,0.00044,-0.955137,175.874715,-82.976106,-25.167077,-118.545346,33.137543,-0.00044,-0.955137,168.9759,-77.665614,-26.774979,-111.178168,33.063357,-0.012845,-0.94599,3.001347,-1.227055,-0.369492,-1.899532,0.489228,-0.073283,-0.620949,3.078396,-1.67191,-0.498409,-2.271373,0.713602,0.044136,-1.145265,176.3791,-95.793405,-28.556755,-129.088877,40.886396,-0.044136,-1.145265,171.964513,-70.305098,-21.170336,-108.835181,28.030691,-0.073283,-0.620949,2
2,1.219836,-1.142847,0.039836,0.711972,0.140667,0.650645,-0.69099,-0.082403,0.147127,-0.062598,1.332992,0.073846,0.626012,0.248807,0.634781,73.525082,-164.779067,-8.472951,25.422926,-8.112557,297.320834,-249.953944,0.597877,118.956646,19.306132,55.642836,-44.192071,3.053291,13.920337,3.568888,1.356397,0.545285,0.986015,0.132446,0.986377,1.248613,0.030381,0.68823,0.249982,0.736,1.334471,0.531541,0.971349,0.133597,0.971316,1.354941,0.169603,0.64631,0.253935,0.654628,303.774023,7.665503,106.133464,61.55708,107.395295,303.460469,6.253668,105.003429,61.847803,106.603665,165.242501,1.125361,24.209893,18.282571,19.663983,298.416928,1.966408,102.705315,61.548063,105.309953,93.892087,-121.384848,-1.607261,42.165838,5.234838,2.362683,-1.561197,-0.761086,-1.807555,0.644637,-0.180743,-1.531553,1.341634,-0.173898,-0.042867,-0.321375,0.061567,-0.409084,3.478554,1.259146,-0.496052,-0.195792,-0.665546,0.214759,0.091623,-1.033791,238.304149,-31.155218,-6.983032,-59.465709,11.236095,-0.705518,4.062259,547.274778,-237.066769,-56.850625,-321.139048,97.114425,-0.067697,-0.958142,99.834907,-19.704158,-5.895389,-35.17932,7.657415,0.030235,1.170665,0.811112,-0.211174,-0.065271,-0.385126,0.08679,0.1475,0.185855,1.218232,-0.458906,-0.107314,-0.639844,0.170085,-0.512079,-0.470855,0.80293,-0.206935,-0.064582,-0.37655,0.084862,0.062588,0.43152,1.185339,-0.502285,-0.203769,-0.685855,0.224287,0.191839,-0.997537,296.10852,-116.745841,-31.9643,-181.873577,45.857947,0.337706,-0.367387,297.206802,-116.151936,-31.74621,-181.668923,46.148778,0.321647,-0.378275,164.11714,-25.422615,-6.973994,-48.654972,8.994213,2.220369,8.807326,296.45052,-116.635221,-32.172745,-177.235637,45.95356,0.297342,-0.402259,215.276935,-79.53546,-17.652234,-114.830708,31.714706,-0.226748,-0.675779,1.62037,-0.186108,-0.046686,-0.320813,0.065303,0.129968,4.873513,2.738339,-1.908773,-0.837929,-2.178667,0.829992,-0.128812,-1.467895,156.895258,-109.364629,-48.009806,-134.702718,47.55505,0.128812,-1.467895,92.840362,-10.663225,-2.674937,-18.381245,3.741563,0.129968,4.873513,3.116418,-2.775179,-1.682675,-2.920526,0.5727,-0.27381,-1.780833,2.681002,-0.387218,-0.109436,-0.757699,0.152014,-0.271634,2.87682,153.610097,-22.185931,-6.270247,-48.884523,8.709785,0.271634,2.87682,178.557595,-159.006021,-96.410151,-167.333819,32.813304,-0.27381,-1.780833,0.369302,52.766636,0.241388,-0.437876,0.797198,0.005221,-0.1131,0.600223,-4.955283,-8.169446,2.694235,1.125718,-0.12428,-0.028638,-0.323444,0.043256,-0.002835,3.275095,1.907342,-0.141848,-0.029962,-0.39471,0.051895,-0.053779,5.570731,0.952028,-0.110865,-0.031669,-0.204149,0.04154,0.075537,4.722602,221.74182,-19.978814,-4.692393,-52.271026,7.473219,0.205375,4.997718,130.866485,-24.397771,-7.384581,-43.581835,9.835206,0.172387,1.098132,63.751136,-9.539324,-2.311907,-19.274785,3.842362,0.351238,2.38941,3.006554,-1.518542,-0.411151,-2.178463,0.621968,-0.003137,-0.991472,2.991025,-1.265548,-0.327568,-1.908854,0.516071,-0.009967,-0.731086,171.373104,-72.51054,-18.768239,-108.600091,29.568697,0.009967,-0.731086,172.262829,-87.006066,-23.557219,-124.81675,35.636118,-0.003137,-0.991472,3.10835,-1.818905,-0.642374,-2.336374,0.732678,-0.047991,-1.184095,3.060012,-1.479478,-0.40642,-2.15676,0.58199,0.035938,-0.89736,175.325789,-84.767847,-23.286136,-122.187845,33.345552,-0.035938,-0.89736,178.095309,-104.215556,-36.805336,-133.864373,41.979341,-0.047991,-1.184095,3


In [185]:
train_preprocess.columns.nunique()

346

In [187]:
drop_columns = ['gy_roll_range_func','gy_XZvector_iqr_func3','gy_XZvector_median','gy_Centerofgravity_prekurt']
train_preprocess[drop_columns]

Unnamed: 0_level_0,gy_roll_range_func,gy_XZvector_iqr_func3,gy_XZvector_median,gy_Centerofgravity_prekurt
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2.833836,-7.117599,21.917642,-0.204728
1,3.082769,-35.747911,64.964235,5.025850
2,3.116418,-6.973994,19.663983,-0.675779
3,2.849506,-11.490834,31.247075,3.489938
4,2.853791,-15.268916,15.534423,19.205443
...,...,...,...,...
3120,2.806288,-11.746323,11.602243,9.594686
3121,2.847665,-10.352061,13.284098,2.198706
3122,2.839952,-60.795614,175.134085,-0.990394
3123,3.071614,-19.495711,59.402955,2.168144


## Catboost Model 
depth = [3,4,5]

In [188]:
y = y['label']
train_df = train_preprocess.drop(drop_columns, axis = 1) 

In [193]:
# depth = 3
cat_depth3 = CatBoostClassifier(random_state = 20,
                               iterations= 26000, task_type = 'GPU',
                               depth = 3, bootstrap_type ='Bernoulli',
                               learning_rate = 0.007932)

# depth = 4 
cat_depth4 = CatBoostClassifier(random_state = 20,
                               iterations= 22000, task_type = 'GPU',
                               depth = 4, bootstrap_type ='Bernoulli',
                               learning_rate = 0.007932)

# depth = 5 
cat_depth5 = CatBoostClassifier(random_state = 20,
                               iterations= 13000, task_type = 'GPU',
                               depth = 5, bootstrap_type ='Bernoulli',
                               learning_rate = 0.007932)

In [192]:
import time 
import datetime 

In [None]:
start = time.time()
cat_depth3.fit(train_df, y)
cat_depth4.fit(train_df, y)
cat_depth5.fit(train_df, y)

sec = time.time()- start

In [None]:
print('3개 모델 학습 소요시간 : ', str(datetime.timedelta(seconds = sec)).split('.'))

In [None]:
test_df = test_preprocess.drop(drop_columns, axis =1 )

predict_depth3 = cat_depth3.predict_proba(test_df)
predict_depth4 = cat_depth4.predict_proba(test_df)
predict_depth5 = cat_depth5.predict_proba(test_df)

#### Soft Voting 

In [None]:
predict = predict_depth3 * 0.45 + predict_depth4 * 0.35 + predict_depth5 * 0.2