In [1]:
# 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# 기본 directory 설정
import os
os.chdir('/content/drive/MyDrive/Monthly_Workout')

In [3]:
# 모듈 불러오기
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import CubicSpline
from scipy.integrate import cumtrapz
from tqdm import tqdm
import matplotlib
import tensorflow as tf
from sklearn import metrics

In [4]:
# 데이터 불러오기
path = './' # 기본 directory 경로에 추가 할 경로

train = pd.read_csv(path + 'train_features.csv')
train_labels = pd.read_csv(path + 'train_labels.csv')
test = pd.read_csv(path + 'test_features.csv')
submission = pd.read_csv(path + 'sample_submission.csv')

In [5]:
act_list=train.iloc[:,2:].columns
acc_list=['acc_x','acc_y','acc_z']
gy_list=['gy_x','gy_y','gy_z']
act_list

Index(['acc_x', 'acc_y', 'acc_z', 'gy_x', 'gy_y', 'gy_z'], dtype='object')

In [6]:
from sklearn.preprocessing import StandardScaler
# Scaling
scaler=StandardScaler()
train[act_list]=scaler.fit_transform(train[act_list])
test[act_list]=scaler.transform(test[act_list])
train

Unnamed: 0,id,time,acc_x,acc_y,acc_z,gy_x,gy_y,gy_z
0,0,0,2.307314,-0.029939,-0.432104,0.011776,-0.410859,-0.461007
1,0,1,2.421086,-0.063321,-0.504058,0.026484,-0.528052,-0.365459
2,0,2,2.444664,-0.056749,-0.654199,-0.037962,-0.596041,-0.366767
3,0,3,2.428612,-0.116782,-0.573407,0.066099,-0.725311,-0.401232
4,0,4,2.439475,-0.044220,-0.588886,0.091969,-0.784094,-0.408413
...,...,...,...,...,...,...,...,...
1874995,3124,595,-0.367432,-0.845648,0.503707,-0.461266,-1.413128,-1.092639
1874996,3124,596,-0.326315,-0.845833,0.578875,-0.474109,-1.382884,-1.097377
1874997,3124,597,-0.300794,-0.859728,0.652722,-0.436694,-1.341672,-1.136169
1874998,3124,598,-0.253120,-0.886873,0.673002,-0.367051,-1.346630,-1.148831


In [7]:
# acc 데이터와 gy 데이터로 분할
def sensor_split(data):
    X_acc = []
    X_gy = []

    for i in tqdm(data['id'].unique()):
        temp_acc = np.array(data[data['id'] == i].loc[:,acc_list])
        temp_gy = np.array(data[data['id'] == i].loc[:,gy_list])
        X_acc.append(temp_acc)
        X_gy.append(temp_gy)
      
    X_acc = np.array(X_acc).reshape(-1,600,3)
    X_gy = np.array(X_gy).reshape(-1,600,3)

    return X_acc, X_gy

In [8]:
# 26(Non-exercise) data, 이 외 label data 분할
X_train_mod=pd.merge(train,train_labels,how='left',on='id')
X_train_26=X_train_mod[X_train_mod['label']==26]
X_train_n26=X_train_mod[X_train_mod['label']!=26]

X_train_acc_26, X_train_gy_26= sensor_split(X_train_26)
X_train_acc_n26, X_train_gy_n26= sensor_split(X_train_n26)

100%|██████████| 1518/1518 [00:05<00:00, 271.78it/s]
100%|██████████| 1607/1607 [00:05<00:00, 267.93it/s]


In [9]:
# 데이터 증강

def Permutation(X, nPerm=4, minSegLength=10):
    X_new = np.zeros(X.shape)
    idx = np.random.permutation(nPerm)
    bWhile = True
    while bWhile == True:
        segs = np.zeros(nPerm+1, dtype=int)
        segs[1:-1] = np.sort(np.random.randint(minSegLength, X.shape[0]-minSegLength, nPerm-1))
        segs[-1] = X.shape[0]
        if np.min(segs[1:]-segs[0:-1]) > minSegLength:
            bWhile = False
    pp = 0
    for ii in range(nPerm):
        x_temp = X[segs[idx[ii]]:segs[idx[ii]+1],:]
        X_new[pp:pp+len(x_temp),:] = x_temp
        pp += len(x_temp)
    return (X_new)

def aug(data, uid, shift):
    shift_data = np.roll(data[uid], shift, axis=0)
    return shift_data
def Rolling(data):
    aug_data=[]
    for i in range(data.shape[0]):
        temp=list((aug(data,i,int(random.random()*600))))
        aug_data.append(temp)
    return np.array(aug_data)

In [10]:
# label 데이터도 26과 not 26 으로 분리
y_train_26=train_labels[train_labels['label']==26]['label']
y_train_n26=train_labels[train_labels['label']!=26]['label']

In [11]:
# 데이터 증강 (반복하고 싶은 만큼 조정)
X_train_acc_n26_temp = X_train_acc_n26.copy()
X_train_gy_n26_temp = X_train_gy_n26.copy()
y_train_total = np.append(y_train_n26, y_train_n26, axis=0)

rep = 5
for i in range(rep):
    X_train_acc_n26_roll = Rolling(X_train_acc_n26_temp)
    X_train_acc_n26_rp = Permutation(Rolling(X_train_acc_n26_temp)) # rolling + permutation

    X_train_gy_n26_roll = Rolling(X_train_gy_n26_temp)
    X_train_gy_n26_rp = Permutation(Rolling(X_train_gy_n26_temp)) # rolling + permutation

    # 증강시킨 데이터 원래 데이터에 추가
    X_train_acc_n26 = np.append(X_train_acc_n26, X_train_acc_n26_roll, axis=0)
    X_train_acc_n26 = np.append(X_train_acc_n26, X_train_acc_n26_rp, axis=0)

    X_train_gy_n26 = np.append(X_train_gy_n26, X_train_gy_n26_roll, axis=0)
    X_train_gy_n26 = np.append(X_train_gy_n26, X_train_gy_n26_rp, axis=0)

    y_train_total = np.append(y_train_total, y_train_n26, axis=0)
    if i != (rep-1): # 마지막 한 번 제외
        y_train_total = np.append(y_train_total, y_train_n26, axis=0)

In [12]:
X_train_acc_n26.shape, X_train_gy_n26.shape, y_train_total.shape

((17677, 600, 3), (17677, 600, 3), (17677,))

In [13]:
# non-exercise 값 추가
X_train_acc = np.append(X_train_acc_n26, X_train_acc_26, axis=0)
X_train_gy = np.append(X_train_gy_n26, X_train_gy_26, axis=0)
y_train = np.append(y_train_total, y_train_26,axis=0)

X_train_acc.shape, X_train_gy.shape

((19195, 600, 3), (19195, 600, 3))

In [14]:
# Shuffle
# s = np.arange(X_train_acc.shape[0])
# np.random.shuffle(s)

# X_train_acc = X_train_acc[s]
# X_train_gy = X_train_gy[s]

# y_train = y_train[s]

In [15]:
# 변화량 feature 추가
X_train_acc_grad = np.gradient(X_train_acc, axis=0)
X_train_gy_grad = np.gradient(X_train_gy, axis=0)
X_train_acc_grad.shape, X_train_gy_grad.shape

((19195, 600, 3), (19195, 600, 3))

In [16]:
X_train_acc = np.append(X_train_acc, X_train_acc_grad, axis=2)
X_train_gy = np.append(X_train_gy,X_train_gy_grad, axis=2)

X_train_acc.shape, X_train_gy.shape

((19195, 600, 6), (19195, 600, 6))

In [17]:
# np array 형태를 dataframe 으로 변환
acc = [e for sl in X_train_acc for e in sl]

df_report = np.stack(acc, axis = 0)
df_acc = pd.DataFrame(df_report, columns= ['acc_x', 'acc_y', 'acc_z', 
                                           'grad_acc_x', 'grad_acc_y', 'grad_acc_z']) 

df_acc

Unnamed: 0,acc_x,acc_y,acc_z,grad_acc_x,grad_acc_y,grad_acc_z
0,2.307314,-0.029939,-0.432104,-1.344887,-0.440231,2.001439
1,2.421086,-0.063321,-0.504058,-1.453668,-0.157143,2.104838
2,2.444664,-0.056749,-0.654199,-1.427295,0.273694,2.185050
3,2.428612,-0.116782,-0.573407,-1.148158,0.188647,2.254954
4,2.439475,-0.044220,-0.588886,-1.157610,-0.215429,2.243544
...,...,...,...,...,...,...
11516995,-0.399599,0.952252,-1.027387,0.335067,0.927293,-0.742572
11516996,-0.477111,0.847710,-0.947752,0.274603,0.861387,-0.659743
11516997,-0.524610,0.772725,-0.879577,0.229243,0.839027,-0.563316
11516998,-0.569191,0.739435,-0.838705,0.176291,0.820222,-0.490561


In [18]:
# gy

gy = [e for sl in X_train_gy for e in sl]

df_report = np.stack(gy, axis = 0)
df_gy = pd.DataFrame(df_report, columns= ['gy_x', 'gy_y', 'gy_z', 
                                          'grad_gy_x', 'grad_gy_y', 'grad_gy_z'])

df_gy

Unnamed: 0,gy_x,gy_y,gy_z,grad_gy_x,grad_gy_y,grad_gy_z
0,0.011776,-0.410859,-0.461007,-0.221724,-2.170136,0.129669
1,0.026484,-0.528052,-0.365459,0.334289,-1.926540,0.085520
2,-0.037962,-0.596041,-0.366767,0.692404,-1.877451,0.190191
3,0.066099,-0.725311,-0.401232,0.145629,-1.608100,0.288185
4,0.091969,-0.784094,-0.408413,0.182812,-1.419158,0.137255
...,...,...,...,...,...,...
11516995,-0.037306,-0.245940,-0.966742,-0.120704,-0.234979,-0.908909
11516996,-0.149804,-0.337640,-0.866939,-0.143862,-0.338580,-0.804554
11516997,-0.116532,-0.380954,-0.749852,-0.067699,-0.382125,-0.681524
11516998,0.023460,-0.395129,-0.650403,0.043948,-0.395534,-0.583507


In [19]:
# acc, gy 데이터프레임 병합
df_aug_result = pd.concat([df_acc, df_gy], axis = 1)

val_id = []
n = int(len(df_aug_result))

# 600번씩 반복되도록 임의로 배열 생성
for i in range(n//600):
    for j in range(600):
        val_id.append(i)

# val_time = [k for k in range(600)]

In [20]:
df_aug_result.insert(0, 'id', val_id) # 리스트값 id 열에 붙여넣기
df_aug_result.head(603)

Unnamed: 0,id,acc_x,acc_y,acc_z,grad_acc_x,grad_acc_y,grad_acc_z,gy_x,gy_y,gy_z,grad_gy_x,grad_gy_y,grad_gy_z
0,0,2.307314,-0.029939,-0.432104,-1.344887,-0.440231,2.001439,0.011776,-0.410859,-0.461007,-0.221724,-2.170136,0.129669
1,0,2.421086,-0.063321,-0.504058,-1.453668,-0.157143,2.104838,0.026484,-0.528052,-0.365459,0.334289,-1.926540,0.085520
2,0,2.444664,-0.056749,-0.654199,-1.427295,0.273694,2.185050,-0.037962,-0.596041,-0.366767,0.692404,-1.877451,0.190191
3,0,2.428612,-0.116782,-0.573407,-1.148158,0.188647,2.254954,0.066099,-0.725311,-0.401232,0.145629,-1.608100,0.288185
4,0,2.439475,-0.044220,-0.588886,-1.157610,-0.215429,2.243544,0.091969,-0.784094,-0.408413,0.182812,-1.419158,0.137255
...,...,...,...,...,...,...,...,...,...,...,...,...,...
598,0,2.063996,-0.384998,-0.705998,-1.628753,-0.257567,2.574156,-0.138130,0.397437,0.279544,0.540829,1.540323,0.062994
599,0,2.212616,-0.451386,-1.028563,-2.008500,0.681964,2.740679,-0.239725,0.301640,0.442235,0.937135,1.534408,-0.287082
600,1,0.962427,-0.470170,1.569335,-0.837869,0.225410,1.072514,-0.209948,-2.580994,-0.331338,-0.009659,0.212756,0.201011
601,1,0.967418,-0.220464,1.600780,-0.931031,0.251971,1.085820,0.360773,-2.454592,-0.279939,0.016762,0.326620,0.156826


In [21]:
train = df_aug_result
train

Unnamed: 0,id,acc_x,acc_y,acc_z,grad_acc_x,grad_acc_y,grad_acc_z,gy_x,gy_y,gy_z,grad_gy_x,grad_gy_y,grad_gy_z
0,0,2.307314,-0.029939,-0.432104,-1.344887,-0.440231,2.001439,0.011776,-0.410859,-0.461007,-0.221724,-2.170136,0.129669
1,0,2.421086,-0.063321,-0.504058,-1.453668,-0.157143,2.104838,0.026484,-0.528052,-0.365459,0.334289,-1.926540,0.085520
2,0,2.444664,-0.056749,-0.654199,-1.427295,0.273694,2.185050,-0.037962,-0.596041,-0.366767,0.692404,-1.877451,0.190191
3,0,2.428612,-0.116782,-0.573407,-1.148158,0.188647,2.254954,0.066099,-0.725311,-0.401232,0.145629,-1.608100,0.288185
4,0,2.439475,-0.044220,-0.588886,-1.157610,-0.215429,2.243544,0.091969,-0.784094,-0.408413,0.182812,-1.419158,0.137255
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11516995,19194,-0.399599,0.952252,-1.027387,0.335067,0.927293,-0.742572,-0.037306,-0.245940,-0.966742,-0.120704,-0.234979,-0.908909
11516996,19194,-0.477111,0.847710,-0.947752,0.274603,0.861387,-0.659743,-0.149804,-0.337640,-0.866939,-0.143862,-0.338580,-0.804554
11516997,19194,-0.524610,0.772725,-0.879577,0.229243,0.839027,-0.563316,-0.116532,-0.380954,-0.749852,-0.067699,-0.382125,-0.681524
11516998,19194,-0.569191,0.739435,-0.838705,0.176291,0.820222,-0.490561,0.023460,-0.395129,-0.650403,0.043948,-0.395534,-0.583507


In [22]:
# id 별로 미분한 값
feature_names = ['acc_x','acc_y','acc_z','gy_x','gy_y','gy_z']

grad_cols=[]
for col in feature_names:
    grad_cols.append(f"grad_{col}")

total_feature_names = feature_names + grad_cols

In [23]:
# for uid in tqdm(train['id'].unique()):
#     temp = train.loc[train['id']==uid, feature_names]
#     grad = np.gradient(temp, axis=0)
#     train.loc[train['id']==uid, grad_cols] = grad
    
for uid in tqdm(test['id'].unique()):
    temp = test.loc[test['id']==uid, feature_names]
    grad = np.gradient(temp, axis=0)
    test.loc[test['id']==uid, grad_cols] = grad
    
test

100%|██████████| 782/782 [00:12<00:00, 63.87it/s]


Unnamed: 0,id,time,acc_x,acc_y,acc_z,gy_x,gy_y,gy_z,grad_acc_x,grad_acc_y,grad_acc_z,grad_gy_x,grad_gy_y,grad_gy_z
0,3125,0,-0.249727,0.002786,0.202699,0.837933,1.212439,0.179961,0.230796,0.293966,-0.434328,0.119800,0.105724,0.037954
1,3125,1,-0.018931,0.296752,-0.231628,0.957733,1.318163,0.217914,0.184453,0.058214,-0.297885,-0.165274,0.036839,-0.001902
2,3125,2,0.119178,0.119214,-0.393070,0.507385,1.286118,0.176158,0.077232,-0.214859,-0.147442,-0.273431,-0.051683,-0.020024
3,3125,3,0.135533,-0.132967,-0.526513,0.410870,1.214796,0.177867,0.035234,-0.027225,-0.199602,-0.075185,-0.058042,0.003935
4,3125,4,0.189646,0.064763,-0.792275,0.357015,1.170034,0.184028,0.053188,0.241527,-0.240765,-0.127280,-0.123611,-0.019546
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469195,3906,595,0.771160,-1.061284,1.235599,-0.150699,0.198219,-2.707576,-0.047492,0.083140,-0.025072,-0.249168,-0.270190,0.069318
469196,3906,596,0.769913,-1.016963,1.185170,-0.395387,-0.110361,-2.566486,0.016801,0.030305,-0.056435,-0.143365,-0.254314,0.196329
469197,3906,597,0.804762,-1.000674,1.122728,-0.437428,-0.310409,-2.314917,0.000581,0.056388,-0.044753,-0.020447,-0.149284,0.294702
469198,3906,598,0.771076,-0.904187,1.095665,-0.436282,-0.408929,-1.977082,-0.048093,0.115720,-0.031116,-0.016299,-0.091407,0.327043


In [24]:
train['acc_t'] = (train['acc_x'] ** 2) + (train['acc_y'] ** 2) + (train['acc_z'] ** 2) ** (1/3)
test['acc_t'] = (test['acc_x'] ** 2) + (test['acc_y'] ** 2) + (test['acc_z'] ** 2) ** (1/3)

train['gy_t'] = (train['gy_x'] ** 2) + (train['gy_y'] ** 2) + (train['gy_z'] ** 2) ** (1/3)
test['gy_t'] = (test['gy_x'] ** 2) + (test['gy_y'] ** 2) + (test['gy_z'] ** 2) ** (1/3)

# Signal 극대화 (peak 캐치 유용)
train['acc_mag'] = (train['acc_x'] ** 2) + (train['acc_y'] ** 2) + (train['acc_z'] ** 2)
test['acc_mag'] = (test['acc_x'] ** 2) + (test['acc_y'] ** 2) + (test['acc_z'] ** 2)

train['gy_mag'] = (train['gy_x'] ** 2) + (train['gy_y'] ** 2) + (train['gy_z'] ** 2)
test['gy_mag'] = (test['gy_x'] ** 2) + (test['gy_y'] ** 2) + (test['gy_z'] ** 2)

In [25]:
# vector
train['acc_vector'] = np.sqrt((train['acc_x'] ** 2) +(train['acc_y'] ** 2)+(train['acc_z'] ** 2))
test['acc_vector'] = np.sqrt((test['acc_x'] ** 2) +(test['acc_y'] ** 2)+(test['acc_z'] ** 2))

train['gy_vector'] = np.sqrt((train['gy_x'] ** 2) +(train['gy_y'] ** 2)+(train['gy_z'] ** 2))
test['gy_vector'] = np.sqrt((test['gy_x'] ** 2) +(test['gy_y'] ** 2)+(test['gy_z'] ** 2))

train['acc_YZvector'] = np.sqrt((train['acc_y'] ** 2) +(train['acc_z'] ** 2))
test['acc_YZvector'] = np.sqrt((test['acc_y'] ** 2) +(test['acc_z'] ** 2))

train['gy_YZvector'] = np.sqrt((train['gy_y'] ** 2) +(train['gy_z'] ** 2))
test['gy_YZvector'] = np.sqrt((test['gy_y'] ** 2) +(test['gy_z'] ** 2))

train['acc_XYvector'] = np.sqrt((train['acc_x'] ** 2) +(train['acc_y'] ** 2))
test['acc_XYvector'] = np.sqrt((test['acc_x'] ** 2) +(test['acc_y'] ** 2))

train['gy_XYvector'] = np.sqrt((train['gy_x'] ** 2) +(train['gy_y'] ** 2))
test['gy_XYvector'] = np.sqrt((test['gy_x'] ** 2) +(test['gy_y'] ** 2))

train['acc_XZvector'] = np.sqrt((train['acc_x'] ** 2) +(train['acc_z'] ** 2))
test['acc_XZvector'] = np.sqrt((test['acc_x'] ** 2) +(test['acc_z'] ** 2))

train['gy_XZvector'] = np.sqrt((train['gy_x'] ** 2) +(train['gy_z'] ** 2))
test['gy_XZvector'] = np.sqrt((test['gy_x'] ** 2) +(test['gy_z'] ** 2))

# 자이로스코프 무게중심
train['gy_Centerofgravity'] = (train['gy_x']+train['gy_y']+train['gy_z'])/3
test['gy_Centerofgravity'] = (test['gy_x']+test['gy_y']+test['gy_z'])/3

# roll & pitch
train['roll'] = np.arctan(train['acc_y']/np.sqrt(train['acc_x'] ** 2 + train['acc_z'] ** 2))
test['roll'] = np.arctan(test['acc_y']/np.sqrt(test['acc_x'] ** 2 + test['acc_z'] ** 2))

train['pitch'] = np.arctan(train['acc_x']/np.sqrt(train['acc_y'] ** 2 + train['acc_z'] ** 2))
test['pitch'] = np.arctan(test['acc_x']/np.sqrt(test['acc_y'] ** 2 + test['acc_z'] ** 2))

In [26]:
from math import pi

train['math_roll'] = np.arctan(- train['acc_x']/np.sqrt(train['acc_y'] ** 2 + train['acc_z'] ** 2)) * (180/pi)
test['math_roll'] = np.arctan(- test['acc_x']/np.sqrt(test['acc_y'] ** 2 + test['acc_z'] ** 2)) * (180/pi)

train['math_pitch'] = np.arctan(train['acc_y']/np.sqrt(train['acc_x'] ** 2 + train['acc_z'] ** 2)) * (180/pi)
test['math_pitch'] = np.arctan(test['acc_y']/np.sqrt(test['acc_x'] ** 2 + test['acc_z'] ** 2)) * (180/pi)

train['gy_roll'] = np.arctan(train['gy_y']/np.sqrt(train['gy_x'] ** 2 + train['gy_z'] ** 2))
test['gy_roll'] = np.arctan(test['gy_y']/np.sqrt(test['gy_x'] ** 2 + test['gy_z'] ** 2))

train['gy_pitch'] = np.arctan(train['gy_x']/np.sqrt(train['gy_y'] ** 2 + train['gy_z'] ** 2))
test['gy_pitch'] = np.arctan(test['gy_x']/np.sqrt(test['gy_y'] ** 2 + test['gy_z'] ** 2))

train['gy_math_roll'] = np.arctan(- train['gy_x']/np.sqrt(train['gy_y'] ** 2 + train['gy_z'] ** 2)) * (180/pi)
test['gy_math_roll'] = np.arctan(- test['gy_x']/np.sqrt(test['gy_y'] ** 2 + test['gy_z'] ** 2)) * (180/pi)

train['gy_math_pitch'] = np.arctan(train['gy_y']/np.sqrt(train['gy_x'] ** 2 + train['gy_z'] ** 2)) * (180/pi)
test['gy_math_pitch'] = np.arctan(test['gy_y']/np.sqrt(test['gy_x'] ** 2 + test['gy_z'] ** 2)) * (180/pi)

print(train.shape)
train

(11517000, 34)


Unnamed: 0,id,acc_x,acc_y,acc_z,grad_acc_x,grad_acc_y,grad_acc_z,gy_x,gy_y,gy_z,grad_gy_x,grad_gy_y,grad_gy_z,acc_t,gy_t,acc_mag,gy_mag,acc_vector,gy_vector,acc_YZvector,gy_YZvector,acc_XYvector,gy_XYvector,acc_XZvector,gy_XZvector,gy_Centerofgravity,roll,pitch,math_roll,math_pitch,gy_roll,gy_pitch,gy_math_roll,gy_math_pitch
0,0,2.307314,-0.029939,-0.432104,-1.344887,-0.440231,2.001439,0.011776,-0.410859,-0.461007,-0.221724,-2.170136,0.129669,5.896150,0.765711,5.511308,0.381471,2.347617,0.617633,0.433140,0.617521,2.307508,0.411027,2.347427,0.461158,-0.286697,-0.012753,1.385231,-79.367910,-0.730704,-0.727781,0.019068,-1.092507,-41.698765
1,0,2.421086,-0.063321,-0.504058,-1.453668,-0.157143,2.104838,0.026484,-0.528052,-0.365459,0.334289,-1.926540,0.085520,6.499033,0.790703,6.119743,0.413101,2.473812,0.642729,0.508020,0.642183,2.421914,0.528716,2.473001,0.366417,-0.289009,-0.025599,1.363966,-78.149478,-1.466742,-0.964174,0.041217,-2.361573,-55.243076
2,0,2.444664,-0.056749,-0.654199,-1.427295,0.273694,2.185050,-0.037962,-0.596041,-0.366767,0.692404,-1.877451,0.190191,6.733202,0.869088,6.407580,0.491224,2.531320,0.700874,0.656655,0.699845,2.445323,0.597249,2.530684,0.368727,-0.333590,-0.022421,1.308383,-74.964812,-1.284609,-1.016793,-0.054190,3.104851,-58.257969
3,0,2.428612,-0.116782,-0.573407,-1.148158,0.188647,2.254954,0.066099,-0.725311,-0.401232,0.145629,-1.608100,0.288185,6.601997,1.074443,6.240592,0.691433,2.498118,0.831524,0.585178,0.828893,2.431419,0.728317,2.495386,0.406641,-0.353481,-0.046765,1.334352,-76.452715,-2.679451,-1.059819,0.079576,-4.559348,-60.723136
4,0,2.439475,-0.044220,-0.588886,-1.157610,-0.215429,2.243544,0.091969,-0.784094,-0.408413,0.182812,-1.419158,0.137255,6.655560,1.173732,6.299780,0.790064,2.509936,0.888855,0.590543,0.884085,2.439876,0.789470,2.509547,0.418640,-0.366846,-0.017619,1.333287,-76.391740,-1.009478,-1.080386,0.103655,-5.938975,-61.901532
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11516995,19194,-0.399599,0.952252,-1.027387,0.335067,0.927293,-0.742572,-0.037306,-0.245940,-0.966742,-0.120704,-0.234979,-0.908909,2.084639,1.039582,2.121987,0.996469,1.456704,0.998233,1.400824,0.997535,1.032697,0.248754,1.102362,0.967462,-0.416663,0.712468,-0.277879,15.921321,40.821387,-0.248939,-0.037381,2.141751,-14.263154
11516996,19194,-0.477111,0.847710,-0.947752,0.274603,0.861387,-0.659743,-0.149804,-0.337640,-0.866939,-0.143862,-0.338580,-0.804554,1.911105,1.045641,1.844482,0.888026,1.358117,0.942351,1.271553,0.930368,0.972752,0.369380,1.061070,0.879787,-0.451461,0.674082,-0.358963,20.567061,38.622051,-0.366441,-0.159646,9.147024,-20.995527
11516997,19194,-0.524610,0.772725,-0.879577,0.229243,0.839027,-0.563316,-0.116532,-0.380954,-0.749852,-0.067699,-0.382125,-0.681524,1.790333,0.984079,1.645975,0.720983,1.282956,0.849107,1.170794,0.841073,0.933981,0.398379,1.024144,0.758853,-0.415779,0.646380,-0.421256,24.136214,37.034857,-0.465257,-0.137675,7.888222,-26.657237
11516998,19194,-0.569191,0.739435,-0.838705,0.176291,0.820222,-0.490561,0.023460,-0.395129,-0.650403,0.043948,-0.395534,-0.583507,1.760093,0.907358,1.574169,0.579701,1.254659,0.761381,1.118119,0.761020,0.933136,0.395825,1.013610,0.650826,-0.340691,0.630256,-0.470871,26.978899,36.111005,-0.545637,0.030817,-1.765705,-31.262726


In [27]:
# 모델 생성
from keras.models import Sequential
from keras.layers import Dense, LSTM, Flatten, Dropout, TimeDistributed, ConvLSTM2D
from tensorflow.keras.utils import to_categorical

In [28]:
# memory crash 로 인한 분할 작업
# train_1 = train.iloc[:, :11]
# train_2 = train.iloc[:, 11:22]
# train_3 = train.iloc[:, 22:]

# len_features_1 = train_1.shape[1] - 1# feature 갯수
# X_1=tf.reshape(np.array(train_1.iloc[:,1:]),[-1, 600, len_features_1])

# len_features_2 = train_2.shape[1] # feature 갯수
# X_2=tf.reshape(np.array(train_2.iloc[:,0:]),[-1, 600, len_features_2])

# len_features_3 = train_3.shape[1] # feature 갯수
# X_3=tf.reshape(np.array(train_3.iloc[:,0:]),[-1, 600, len_features_3])

# X_1.shape, X_2.shape, X_3.shape

In [29]:
# X_1 = X_1.numpy()
# X_2 = X_2.numpy()
# X_3 = X_3.numpy()

# X_1.shape, X_2.shape, X_3.shape

In [30]:
# X = np.append(X_1, X_2, axis=2)
# X = np.append(X, X_3, axis=2)

In [31]:
len_features = train.shape[1] - 1 # feature 갯수
X=tf.reshape(np.array(train.iloc[:,1:]),[-1, 600, len_features])
X = X.numpy()
X.shape

(19195, 600, 33)

In [33]:
trainX = X
trainX.shape

(19195, 600, 33)

In [34]:
trainy = to_categorical(y_train) 
trainy.shape

(19195, 61)

In [35]:
# 모델 정의
epochs, batch_size = 100, 128
n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
# reshape into subsequences (samples, time steps, rows, cols, channels)
n_steps, n_length = 4, 150
trainX = trainX.reshape((trainX.shape[0], n_steps, 1, n_length, n_features))

In [36]:
model = Sequential()
model.add(ConvLSTM2D(filters=64, kernel_size=(1,3), activation='relu', input_shape=(n_steps, 1, n_length, n_features)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [37]:
# fit network
model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f10301b1590>

In [38]:
test_X=tf.reshape(np.array(test.iloc[:,2:]),[-1, 600, len_features])
test_X = test_X.numpy()
test_X = test_X.reshape((test_X.shape[0], n_steps, 1, n_length, n_features))
test_X.shape

(782, 4, 1, 150, 33)

In [39]:
prediction=model.predict(test_X)

In [40]:
prediction

array([[1.69879698e-07, 1.45599541e-08, 1.09822040e-10, ...,
        3.91776703e-04, 1.56439253e-08, 5.57847943e-06],
       [2.85176299e-07, 2.33695174e-09, 5.34366640e-09, ...,
        2.83130534e-13, 8.74528505e-10, 2.30497812e-06],
       [1.99400514e-08, 4.89290942e-05, 2.20877254e-07, ...,
        1.71209482e-08, 1.66553116e-06, 1.08285356e-07],
       ...,
       [2.16740337e-09, 2.85203555e-10, 3.00647162e-14, ...,
        2.88881349e-15, 2.34404889e-13, 2.37010749e-08],
       [1.63469167e-07, 5.29996905e-05, 2.29323928e-06, ...,
        6.46374428e-06, 1.18076522e-02, 4.56050085e-03],
       [1.79722705e-14, 1.59103611e-12, 4.53277664e-16, ...,
        2.80337785e-14, 1.32041603e-14, 4.78952238e-08]], dtype=float32)

In [41]:
prediction.shape

(782, 61)

In [42]:
submission

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60
0,3125,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,3126,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,3127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,3129,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
777,3902,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
778,3903,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
779,3904,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
780,3905,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [43]:
submission.iloc[:,1:]=prediction

In [44]:
submission

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60
0,3125,1.698797e-07,1.455995e-08,1.098220e-10,1.999307e-10,5.338525e-07,3.186212e-10,6.200220e-07,1.429612e-10,1.990501e-08,9.062046e-01,3.187522e-08,4.843842e-03,2.594850e-08,2.661000e-04,5.088761e-08,9.885465e-10,9.759511e-10,1.190313e-09,1.663555e-07,1.485783e-07,4.732752e-13,6.076229e-14,1.985962e-11,2.106121e-11,1.577028e-02,4.249685e-09,0.000001,1.066868e-08,3.501262e-08,3.339620e-09,7.184680e-02,6.619267e-04,5.251936e-09,3.584181e-09,1.514790e-08,2.141354e-10,5.076363e-08,9.322926e-08,5.033229e-11,6.124161e-08,8.968190e-10,7.319741e-08,4.568787e-13,1.194778e-08,1.001463e-09,3.618119e-09,1.448535e-09,2.921730e-07,2.668689e-09,1.289621e-08,1.593942e-07,4.482278e-07,3.828370e-06,8.399167e-09,8.047206e-07,6.131214e-10,3.189195e-11,1.660872e-08,3.917767e-04,1.564393e-08,5.578479e-06
1,3126,2.851763e-07,2.336952e-09,5.343666e-09,8.322422e-06,9.257754e-08,1.902672e-10,1.242152e-10,1.069366e-11,1.310907e-07,2.788171e-09,2.485132e-11,1.122564e-11,1.575293e-10,5.949091e-09,3.304727e-08,8.678623e-09,2.352618e-09,2.472019e-07,7.638168e-10,3.436462e-13,4.311527e-11,3.105234e-05,1.826633e-05,4.790427e-08,3.332639e-10,1.774767e-11,0.999780,1.485586e-10,1.775831e-07,1.015897e-11,1.625716e-08,4.097431e-15,1.501654e-04,1.814657e-09,1.349816e-06,1.136709e-08,7.539967e-07,1.344683e-10,1.060378e-08,2.290264e-10,2.016218e-11,3.624190e-13,1.065772e-07,4.043979e-09,2.747277e-08,2.072721e-12,1.125960e-10,1.299655e-10,8.144311e-07,3.654687e-08,5.139917e-06,1.106856e-13,1.274126e-13,4.452118e-14,3.616753e-17,8.967239e-09,2.863779e-09,1.291315e-10,2.831305e-13,8.745285e-10,2.304978e-06
2,3127,1.994005e-08,4.892909e-05,2.208773e-07,3.319666e-04,6.458889e-08,3.168855e-07,3.317090e-06,2.595208e-06,1.317173e-04,9.246644e-07,5.603833e-09,1.222317e-08,2.332974e-06,9.226001e-10,1.728079e-04,1.237608e-05,7.956861e-06,2.677476e-06,4.882277e-07,7.366214e-05,4.762000e-10,2.078239e-07,1.880636e-05,7.432686e-09,3.957748e-04,4.031014e-08,0.000032,2.761546e-04,1.156589e-05,1.236466e-07,3.544643e-09,8.357723e-08,6.572438e-04,2.781803e-05,9.323784e-08,2.029669e-07,5.051429e-07,7.261589e-05,4.544539e-06,6.268376e-07,1.163557e-07,1.393618e-06,5.519494e-06,2.569678e-06,2.179679e-03,9.939777e-01,8.089489e-06,1.128229e-03,7.413453e-05,3.080819e-04,8.307591e-07,8.397377e-09,5.188945e-07,5.125651e-09,1.634667e-05,3.600413e-06,9.993920e-08,3.039754e-07,1.712095e-08,1.665531e-06,1.082854e-07
3,3128,4.908253e-09,3.935657e-09,1.373443e-05,2.433528e-09,3.071778e-08,1.666927e-12,6.938114e-08,1.032794e-07,7.945304e-07,1.045766e-08,3.386727e-06,4.626649e-06,1.539226e-11,2.318480e-03,4.390427e-12,2.832342e-09,1.690186e-06,1.283085e-07,1.664769e-08,4.280375e-10,1.815667e-13,1.116964e-08,8.094654e-10,7.750471e-06,1.634860e-04,6.407713e-13,0.714757,5.798913e-12,6.097240e-07,3.809685e-06,1.725927e-08,9.616489e-10,4.047337e-04,1.083697e-04,1.884557e-08,1.756117e-09,6.597510e-06,1.131571e-07,2.321304e-04,1.537090e-11,3.036481e-09,1.200507e-09,4.508274e-03,7.883534e-05,1.002691e-10,6.551582e-08,6.173691e-11,2.029640e-04,1.375712e-05,2.771416e-01,4.273323e-08,2.673627e-08,3.395525e-13,2.368812e-10,1.423608e-07,2.209542e-05,3.262017e-15,2.382642e-06,2.899493e-07,9.609968e-09,1.467087e-06
4,3129,1.172540e-07,1.322302e-11,1.638089e-12,5.245980e-07,1.200464e-07,1.424183e-07,3.964810e-10,2.950378e-10,1.157803e-09,7.110632e-06,2.839502e-09,5.314078e-11,2.923367e-10,7.932977e-15,3.466057e-08,1.772950e-09,5.322651e-10,2.017517e-14,7.638062e-09,4.178936e-14,3.575068e-10,1.092752e-08,5.860893e-07,3.446003e-09,7.315507e-06,1.811273e-14,0.998966,3.905287e-13,1.020924e-08,2.489271e-11,1.433496e-10,4.141211e-12,1.945714e-06,3.620928e-08,2.291055e-08,4.645610e-04,1.040705e-04,2.466938e-11,5.032794e-11,1.299423e-14,8.725923e-12,1.584892e-16,2.136110e-15,5.222640e-10,1.354268e-12,5.216024e-16,1.722922e-13,3.018759e-11,1.434351e-04,5.110904e-05,2.519115e-04,5.487138e-07,5.798872e-13,2.771271e-13,1.655948e-16,2.232618e-09,1.425323e-09,1.108008e-13,6.406597e-09,6.601686e-13,9.717372e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
777,3902,7.137185e-09,2.885935e-12,1.087015e-15,2.167532e-10,1.108222e-08,1.464722e-14,2.096866e-13,8.712484e-12,2.626630e-09,3.513776e-08,1.950651e-12,1.049797e-14,2.314748e-18,1.166257e-19,2.447788e-10,2.252002e-11,1.574554e-15,9.010796e-12,5.092701e-11,7.489504e-19,5.943729e-17,2.657019e-13,7.030470e-14,5.784982e-10,1.594157e-04,1.624922e-13,0.840637,1.120174e-11,1.427591e-11,9.098292e-11,1.746864e-11,3.620674e-14,1.578096e-01,1.675955e-07,3.727283e-12,1.367446e-11,5.820623e-13,3.337376e-16,1.794643e-09,1.298848e-18,1.744927e-16,1.204238e-18,1.822683e-12,4.080407e-08,4.440604e-14,1.844305e-16,1.124494e-15,1.479625e-12,3.877489e-06,1.389598e-03,1.442703e-08,2.199820e-13,3.075582e-14,2.308881e-13,6.172609e-24,5.195379e-13,1.441048e-11,1.633689e-16,4.266450e-17,1.611974e-15,9.764473e-10
778,3903,6.395057e-17,5.449055e-16,3.454640e-14,7.801951e-10,4.206030e-13,3.792102e-11,2.828709e-18,2.465587e-19,7.425439e-11,1.006375e-16,3.547669e-19,1.147249e-13,1.713254e-13,1.467869e-16,8.575019e-16,1.110910e-07,2.046035e-17,5.002103e-14,8.597510e-11,9.369115e-13,4.594426e-07,6.248514e-09,3.343364e-09,1.023533e-08,7.518584e-12,3.785629e-14,0.999886,2.117412e-16,2.466370e-13,3.496897e-17,5.737286e-16,1.626543e-17,1.079219e-14,3.201230e-18,2.572683e-06,5.542382e-06,6.940278e-08,2.480090e-15,8.247546e-14,8.622005e-20,6.035518e-14,1.049542e-17,8.168677e-13,7.421233e-15,2.769151e-13,1.040745e-21,1.606195e-14,4.005424e-16,7.973581e-12,1.460671e-10,7.784737e-10,4.485035e-11,3.883243e-15,3.968764e-11,2.130425e-22,1.050088e-04,1.726396e-11,2.836254e-16,1.297131e-15,2.426839e-15,2.297828e-07
779,3904,2.167403e-09,2.852036e-10,3.006472e-14,1.338418e-11,8.829715e-12,1.851339e-11,6.373066e-19,3.605248e-14,1.075134e-09,1.942639e-10,3.122624e-14,1.106472e-12,6.647843e-11,1.460800e-15,1.906081e-12,5.729938e-14,1.584595e-14,1.213234e-12,1.296841e-11,1.881530e-15,1.037166e-16,1.987979e-10,1.222804e-11,8.586421e-08,2.741637e-07,2.541735e-16,0.999883,4.312369e-13,6.851361e-08,4.753422e-14,6.928398e-11,1.329018e-12,1.447751e-12,2.771758e-10,2.370851e-06,7.786472e-13,4.242688e-09,3.342079e-13,6.940111e-16,8.642781e-15,3.433410e-12,1.521259e-15,1.723027e-10,2.623984e-13,4.292528e-16,2.006780e-14,4.432254e-15,1.487803e-11,8.259778e-12,1.294403e-07,4.259742e-08,4.541756e-13,9.680985e-12,4.596069e-15,2.068533e-20,1.744935e-09,5.070949e-13,1.138475e-04,2.888813e-15,2.344049e-13,2.370107e-08
780,3905,1.634692e-07,5.299969e-05,2.293239e-06,3.080051e-06,3.975373e-04,1.472353e-05,5.336988e-06,5.025201e-06,1.416750e-01,1.587917e-02,5.708310e-06,1.940917e-05,3.361255e-04,2.189072e-06,8.355996e-04,1.339002e-02,1.707564e-05,5.439762e-01,1.747337e-01,5.967477e-03,4.371566e-04,3.715792e-05,1.153725e-04,5.162089e-04,1.564170e-04,3.428007e-04,0.001929,3.432381e-03,2.307012e-03,5.410545e-05,5.671334e-05,1.971809e-05,3.246538e-04,9.457642e-04,1.417018e-03,8.457030e-06,2.232756e-04,1.228817e-02,4.073976e-04,1.740323e-03,8.245307e-05,2.896013e-03,9.250739e-06,5.275567e-05,4.018803e-06,1.812549e-05,1.030806e-03,1.391360e-02,1.269792e-04,1.945789e-03,1.854877e-03,3.483366e-02,5.578292e-07,6.227253e-04,2.190633e-06,1.326910e-03,8.080663e-04,1.882651e-05,6.463744e-06,1.180765e-02,4.560501e-03


In [45]:
submission.to_csv('LSTM_9(jun).csv', index=False)