In [None]:
# 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# 기본 directory 설정
import os
os.chdir('/content/drive/MyDrive/Monthly_Workout')

In [1]:
# 모듈 불러오기
import random
import pandas as pd
import numpy as np
from tqdm import tqdm
from math import pi

In [2]:
# 데이터 불러오기
path = './' # 기본 directory 경로에 추가 할 경로

train = pd.read_csv(path + 'train_features.csv')
train_labels = pd.read_csv(path + 'train_labels.csv')
test = pd.read_csv(path + 'test_features.csv')
submission = pd.read_csv(path + 'sample_submission.csv')

train

Unnamed: 0,id,time,acc_x,acc_y,acc_z,gy_x,gy_y,gy_z
0,0,0,1.206087,-0.179371,-0.148447,-0.591608,-30.549010,-31.676112
1,0,1,1.287696,-0.198974,-0.182444,0.303100,-39.139103,-24.927216
2,0,2,1.304609,-0.195114,-0.253382,-3.617278,-44.122565,-25.019629
3,0,3,1.293095,-0.230366,-0.215210,2.712986,-53.597843,-27.454013
4,0,4,1.300887,-0.187757,-0.222523,4.286707,-57.906561,-27.961234
...,...,...,...,...,...,...,...,...
1874995,3124,595,-0.712530,-0.658357,0.293707,-29.367857,-104.013664,-76.290437
1874996,3124,596,-0.683037,-0.658466,0.329223,-30.149089,-101.796809,-76.625087
1874997,3124,597,-0.664730,-0.666625,0.364114,-27.873095,-98.776072,-79.365125
1874998,3124,598,-0.630534,-0.682565,0.373696,-23.636550,-99.139495,-80.259478


In [3]:
act_list=train.iloc[:,2:].columns
acc_list=['acc_x','acc_y','acc_z']
gy_list=['gy_x','gy_y','gy_z']
act_list

Index(['acc_x', 'acc_y', 'acc_z', 'gy_x', 'gy_y', 'gy_z'], dtype='object')

In [4]:
# acc 데이터와 gy 데이터로 분할
def sensor_split(data):
    X_acc = []
    X_gy = []

    for i in tqdm(data['id'].unique()):
        temp_acc = np.array(data[data['id'] == i].loc[:,acc_list])
        temp_gy = np.array(data[data['id'] == i].loc[:,gy_list])
        X_acc.append(temp_acc)
        X_gy.append(temp_gy)
      
    X_acc = np.array(X_acc).reshape(-1,600,3)
    X_gy = np.array(X_gy).reshape(-1,600,3)

    return X_acc, X_gy

In [5]:
# 데이터 증강

def permutation(X, nPerm=4, minSegLength=10):
    X_new = np.zeros(X.shape)
    idx = np.random.permutation(nPerm)
    bWhile = True
    while bWhile == True:
        segs = np.zeros(nPerm+1, dtype=int)
        segs[1:-1] = np.sort(np.random.randint(minSegLength, X.shape[0]-minSegLength, nPerm-1))
        segs[-1] = X.shape[0]
        if np.min(segs[1:]-segs[0:-1]) > minSegLength:
            bWhile = False
    pp = 0
    for ii in range(nPerm):
        x_temp = X[segs[idx[ii]]:segs[idx[ii]+1],:]
        X_new[pp:pp+len(x_temp),:] = x_temp
        pp += len(x_temp)
    return (X_new)

def aug(data, uid, shift):
    shift_data = np.roll(data[uid], shift, axis=0)
    return shift_data

def rolling(data):
    aug_data=[]
    for i in range(data.shape[0]):
        temp=list((aug(data,i,int(random.random()*600))))
        aug_data.append(temp)
    return np.array(aug_data)

In [6]:
# 데이터 증강 (반복하고 싶은 만큼 조정)
def start_augmentation(train, train_labels):
    # acc, gy 데이터 분할
    X_train_mod=pd.merge(train,train_labels,how='left',on='id')
    X_train_acc, X_train_gy= sensor_split(X_train_mod)

    # 증강시키고 추가할 임시 데이터 복사본
    X_train_acc_temp = X_train_acc.copy()
    X_train_gy_temp = X_train_gy.copy()

    # label 데이터 변환
    y_train = train_labels['label']
    y_train_total = np.append(y_train, y_train, axis=0)

    rep = 5 # 5이상의 경우 reshape 과정에서 reset될 가능성 높음
    for i in range(rep):
        X_train_acc_roll = rolling(X_train_acc_temp)
        X_train_gy_roll = rolling(X_train_gy_temp)

        # 증강시킨 데이터 원래 데이터에 추가
        X_train_acc = np.append(X_train_acc, X_train_acc_roll, axis=0)
        X_train_gy = np.append(X_train_gy, X_train_gy_roll, axis=0)

        if i != (rep-1): # 마지막 한 번 제외
            y_train_total = np.append(y_train_total, y_train, axis=0)

    return X_train_acc, X_train_gy, y_train_total 

In [7]:
X_train_acc, X_train_gy, y_train_total = start_augmentation(train, train_labels)

X_train_acc.shape, X_train_gy.shape, y_train_total.shape

100%|█████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:20<00:00, 155.70it/s]


((18750, 600, 3), (18750, 600, 3), (18750,))

In [8]:
# np array 형태를 dataframe 으로 변환
def np_to_df(X_train_acc, X_train_gy):
    acc = [e for sl in X_train_acc for e in sl]
    gy = [e for sl in X_train_gy for e in sl]

    df_report_acc = np.stack(acc, axis = 0)
    df_report_gy = np.stack(gy, axis = 0)

    df_acc = pd.DataFrame(df_report_acc, columns= ['acc_x', 'acc_y', 'acc_z']) 
    df_gy = pd.DataFrame(df_report_gy, columns= ['gy_x', 'gy_y', 'gy_z']) 

    # acc, gy 데이터프레임 병합
    df_aug_result = pd.concat([df_acc, df_gy], axis = 1)
    
    return df_aug_result

In [9]:
train = np_to_df(X_train_acc, X_train_gy)
train

Unnamed: 0,acc_x,acc_y,acc_z,gy_x,gy_y,gy_z
0,1.206087,-0.179371,-0.148447,-0.591608,-30.549010,-31.676112
1,1.287696,-0.198974,-0.182444,0.303100,-39.139103,-24.927216
2,1.304609,-0.195114,-0.253382,-3.617278,-44.122565,-25.019629
3,1.293095,-0.230366,-0.215210,2.712986,-53.597843,-27.454013
4,1.300887,-0.187757,-0.222523,4.286707,-57.906561,-27.961234
...,...,...,...,...,...,...
11249995,-0.992952,-0.321013,-0.090105,18.091944,79.130873,63.858367
11249996,-0.979069,-0.240940,-0.089751,16.734111,80.703429,61.247504
11249997,-0.948902,-0.197942,-0.110383,16.194187,80.104061,59.666932
11249998,-0.944806,-0.314007,-0.122991,26.457620,80.460091,57.722090


In [10]:
# 가속도
train['acc_t'] = (train['acc_x'] ** 2) + (train['acc_y'] ** 2) + (train['acc_z'] ** 2) ** (1/3)
test['acc_t'] = (test['acc_x'] ** 2) + (test['acc_y'] ** 2) + (test['acc_z'] ** 2) ** (1/3)

train['gy_t'] = (train['gy_x'] ** 2) + (train['gy_y'] ** 2) + (train['gy_z'] ** 2) ** (1/3)
test['gy_t'] = (test['gy_x'] ** 2) + (test['gy_y'] ** 2) + (test['gy_z'] ** 2) ** (1/3)

# Signal 극대화 (peak 캐치 유용)
train['acc_mag'] = (train['acc_x'] ** 2) + (train['acc_y'] ** 2) + (train['acc_z'] ** 2)
test['acc_mag'] = (test['acc_x'] ** 2) + (test['acc_y'] ** 2) + (test['acc_z'] ** 2)

train['gy_mag'] = (train['gy_x'] ** 2) + (train['gy_y'] ** 2) + (train['gy_z'] ** 2)
test['gy_mag'] = (test['gy_x'] ** 2) + (test['gy_y'] ** 2) + (test['gy_z'] ** 2)

In [11]:
# vector
train['acc_vec'] = np.sqrt((train['acc_x'] ** 2) +(train['acc_y'] ** 2)+(train['acc_z'] ** 2))
test['acc_vec'] = np.sqrt((test['acc_x'] ** 2) +(test['acc_y'] ** 2)+(test['acc_z'] ** 2))

train['gy_vec'] = np.sqrt((train['gy_x'] ** 2) +(train['gy_y'] ** 2)+(train['gy_z'] ** 2))
test['gy_vec'] = np.sqrt((test['gy_x'] ** 2) +(test['gy_y'] ** 2)+(test['gy_z'] ** 2))

# 자이로스코프 무게중심
train['gy_gravity'] = (train['gy_x']+train['gy_y']+train['gy_z'])/3
test['gy_gravity'] = (test['gy_x']+test['gy_y']+test['gy_z'])/3

In [12]:
# roll & pitch
train['roll'] = np.arctan(train['acc_y']/np.sqrt(train['acc_x'] ** 2 + train['acc_z'] ** 2))
test['roll'] = np.arctan(test['acc_y']/np.sqrt(test['acc_x'] ** 2 + test['acc_z'] ** 2))

train['pitch'] = np.arctan(train['acc_x']/np.sqrt(train['acc_y'] ** 2 + train['acc_z'] ** 2))
test['pitch'] = np.arctan(test['acc_x']/np.sqrt(test['acc_y'] ** 2 + test['acc_z'] ** 2))

train['math_roll'] = np.arctan(- train['acc_x']/np.sqrt(train['acc_y'] ** 2 + train['acc_z'] ** 2)) * (180/pi)
test['math_roll'] = np.arctan(- test['acc_x']/np.sqrt(test['acc_y'] ** 2 + test['acc_z'] ** 2)) * (180/pi)

train['math_pitch'] = np.arctan(train['acc_y']/np.sqrt(train['acc_x'] ** 2 + train['acc_z'] ** 2)) * (180/pi)
test['math_pitch'] = np.arctan(test['acc_y']/np.sqrt(test['acc_x'] ** 2 + test['acc_z'] ** 2)) * (180/pi)

train['gy_roll'] = np.arctan(train['gy_y']/np.sqrt(train['gy_x'] ** 2 + train['gy_z'] ** 2))
test['gy_roll'] = np.arctan(test['gy_y']/np.sqrt(test['gy_x'] ** 2 + test['gy_z'] ** 2))

train['gy_pitch'] = np.arctan(train['gy_x']/np.sqrt(train['gy_y'] ** 2 + train['gy_z'] ** 2))
test['gy_pitch'] = np.arctan(test['gy_x']/np.sqrt(test['gy_y'] ** 2 + test['gy_z'] ** 2))

train['gy_math_roll'] = np.arctan(- train['gy_x']/np.sqrt(train['gy_y'] ** 2 + train['gy_z'] ** 2)) * (180/pi)
test['gy_math_roll'] = np.arctan(- test['gy_x']/np.sqrt(test['gy_y'] ** 2 + test['gy_z'] ** 2)) * (180/pi)

train['gy_math_pitch'] = np.arctan(train['gy_y']/np.sqrt(train['gy_x'] ** 2 + train['gy_z'] ** 2)) * (180/pi)
test['gy_math_pitch'] = np.arctan(test['gy_y']/np.sqrt(test['gy_x'] ** 2 + test['gy_z'] ** 2)) * (180/pi)

print(train.shape)
train

(11250000, 21)


Unnamed: 0,acc_x,acc_y,acc_z,gy_x,gy_y,gy_z,acc_t,gy_t,acc_mag,gy_mag,...,gy_vec,gy_gravity,roll,pitch,math_roll,math_pitch,gy_roll,gy_pitch,gy_math_roll,gy_math_pitch
0,1.206087,-0.179371,-0.148447,-0.591608,-30.549010,-31.676112,1.767177,943.603223,1.508855,1936.968046,...,44.010999,-20.938910,-0.146550,1.380095,-79.073624,-8.396683,-0.767200,-0.013443,0.770209,-43.957305
1,1.287696,-0.198974,-0.182444,0.303100,-39.139103,-24.927216,2.019429,1540.494535,1.731038,2153.327355,...,46.403958,-21.254406,-0.151814,1.364146,-78.159835,-8.698302,-1.003661,0.006532,-0.374246,-57.505519
2,1.304609,-0.195114,-0.253382,-3.617278,-44.122565,-25.019629,2.140496,1968.439749,1.804277,2585.867212,...,50.851423,-24.253157,-0.145773,1.330405,-76.226605,-8.352177,-1.050508,-0.071194,4.079137,-60.189701
3,1.293095,-0.230366,-0.215210,2.712986,-53.597843,-27.454013,2.084285,2889.189647,1.771478,3633.811839,...,60.281107,-26.112956,-0.173958,1.331665,-76.298763,-9.967037,-1.095444,0.045021,-2.579501,-62.764338
4,1.300887,-0.187757,-0.222523,4.286707,-57.906561,-27.961234,2.094771,3380.757973,1.777076,4153.376248,...,64.446693,-27.193696,-0.141316,1.350615,-77.384519,-8.096783,-1.116383,0.066565,-3.813876,-63.964038
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11249995,-0.992952,-0.321013,-0.090105,18.091944,79.130873,63.858367,1.289989,6604.989949,1.097122,10666.904631,...,103.280708,53.693728,-0.311487,-1.246841,71.438699,-17.846914,0.872864,0.176081,-10.088699,50.011446
11249996,-0.979069,-0.240940,-0.089751,16.734111,80.703429,61.247504,1.217087,6808.611865,1.024683,10544.330665,...,102.685591,52.895015,-0.240327,-1.313985,75.285820,-13.769703,0.904195,0.163695,-9.379012,51.806541
11249997,-0.948902,-0.197942,-0.110383,16.194187,80.104061,59.666932,1.169706,6694.181743,0.951781,10239.055139,...,101.188216,51.988394,-0.204312,-1.336345,76.566941,-11.706243,0.913479,0.160731,-9.209232,52.338501
11249998,-0.944806,-0.314007,-0.122991,26.457620,80.460091,57.722090,1.238573,7188.767624,1.006386,10505.671508,...,102.497178,54.879934,-0.318360,-1.227957,70.356740,-18.240658,0.902693,0.261086,-14.959145,51.720503


In [13]:
# Scaling 원하는 걸로 사용
from sklearn.preprocessing import RobustScaler

scaler = RobustScaler()
train = scaler.fit_transform(train)
test.drop(['id', 'time'], axis=1, inplace=True)
test = scaler.transform(test)
train

array([[ 1.93489817,  0.03657354, -0.31330862, ...,  0.05924213,
        -0.05924213, -0.71658616],
       [ 2.01969699,  0.00673528, -0.37289479, ...,  0.07768724,
        -0.07768724, -0.94204715],
       [ 2.0372705 ,  0.01260973, -0.49722803, ...,  0.0059123 ,
        -0.0059123 , -0.98671563],
       ...,
       [-0.30429759,  0.0083059 , -0.24659414, ...,  0.22008025,
        -0.22008025,  0.88590903],
       [-0.30004125, -0.16836605, -0.26869281, ...,  0.31275131,
        -0.31275131,  0.87562469],
       [-0.33315173, -0.40017154, -0.28689203, ...,  0.31232627,
        -0.31232627,  0.92580668]])

In [14]:
import tensorflow as tf 
from keras.models import Sequential
from keras.layers import Dropout, LSTM, Input
from keras.layers import TimeDistributed
from keras.layers import Activation, GlobalAveragePooling1D
from keras.layers import Dense, Flatten, BatchNormalization
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers
from keras.models import load_model
from keras.layers.merge import concatenate

In [15]:
len_features = train.shape[1] # feature 갯수
X = train.reshape(-1, 600, len_features)
X.shape

(18750, 600, 21)

In [16]:
y = to_categorical(y_train_total)
y.shape

(18750, 61)

In [17]:
epochs, batch_size = 100, 64 # Ram 24GB 기준 256 이상 reset될 가능성 높음

In [18]:
test_X = test.reshape(-1, 600, len_features)
test_X.shape

(782, 600, 21)

In [24]:
pd.DataFrame(train).to_csv('model(1)_train.csv')

In [25]:
pd.DataFrame(y_train_total).to_csv('model(1)y_train_total.csv')
pd.DataFrame(test).to_csv('model(1)_test.csv')

In [19]:
#epochs, batch_size = 30, 64
n_features, n_outputs = X.shape[2], y.shape[1]
# reshape data into time steps of sub-sequences
n_steps, n_length = 6, 100
X = X.reshape((X.shape[0], n_steps, n_length, n_features))
test_X = test_X.reshape((test_X.shape[0], n_steps, n_length, n_features))

In [26]:
class Models:
    # 각종 사이즈는 다양하게 적용하여 stacking 쌓아 올리기
    def define_model_0():
        model = Sequential()
        model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(None,n_length,n_features)))
        model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
        model.add(TimeDistributed(Dropout(0.5)))
        model.add(TimeDistributed(GlobalAveragePooling1D()))
        model.add(TimeDistributed(Flatten()))
        model.add(LSTM(32))
        model.add(Dropout(0.5))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(n_outputs, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

        return model

    def define_model_1():
        model = Sequential()
        model.add(TimeDistributed(Conv1D(filters=32, kernel_size=3, activation='relu'), input_shape=(None,n_length,n_features)))
        model.add(TimeDistributed(Dropout(0.5)))
        model.add(TimeDistributed(Conv1D(filters=32, kernel_size=3, activation='relu')))
        model.add(TimeDistributed(GlobalAveragePooling1D()))
        model.add(TimeDistributed(Flatten()))
        model.add(LSTM(32))
        model.add(Dropout(0.5))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(n_outputs, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

        return model

    def define_model_2():
        model = Sequential()
        model.add(TimeDistributed(Conv1D(filters=64, kernel_size=6, activation='relu'), input_shape=(None,n_length,n_features)))
        model.add(TimeDistributed(Dropout(0.5)))
        model.add(TimeDistributed(Conv1D(filters=64, kernel_size=6, activation='relu')))
        model.add(TimeDistributed(GlobalAveragePooling1D()))
        model.add(TimeDistributed(Flatten()))
        model.add(LSTM(32))
        model.add(Dropout(0.5))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(n_outputs, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

        return model

    def define_model_3():
        model = Sequential()
        model.add(TimeDistributed(Conv1D(filters=32, kernel_size=3, activation='relu'), input_shape=(None,n_length,n_features)))
        model.add(TimeDistributed(Conv1D(filters=64, kernel_size=6, activation='relu')))
        model.add(TimeDistributed(Dropout(0.5)))
        model.add(TimeDistributed(GlobalAveragePooling1D()))
        model.add(TimeDistributed(Flatten()))
        model.add(LSTM(32))
        model.add(Dropout(0.3))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(n_outputs, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

        return model

    def define_model_4():
        model = Sequential()
        model.add(TimeDistributed(Conv1D(filters=32, kernel_size=3, activation='relu'), input_shape=(None,n_length,n_features)))
        model.add(TimeDistributed(Conv1D(filters=32, kernel_size=3, activation='relu')))
        model.add(TimeDistributed(Dropout(0.5)))
        model.add(TimeDistributed(GlobalAveragePooling1D()))
        model.add(TimeDistributed(Flatten()))
        model.add(LSTM(32))
        model.add(Dropout(0.5))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(n_outputs, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

        return model

In [27]:
# 위의 모델들 학습하면서 stacking
for i in range(5):
    model = getattr(Models, f'define_model_{i}')()
    checkpoint_path = "checkpoint/cp.ckpt"
    
    cp_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', 
                                verbose=1, save_weights_only=True, 
                                save_best_only=True, mode='min')
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, mode='min', verbose=1)
    
    model.fit(X, y, epochs=epochs, batch_size=batch_size, 
            validation_split=0.2, callbacks=[early_stopping, cp_callback])
    model.save(f'models/211008(1)_model_{i}.h5')
    tf.keras.backend.clear_session()

Epoch 1/100

Epoch 00001: val_loss improved from inf to 1.87897, saving model to checkpoint\cp.ckpt
Epoch 2/100

Epoch 00002: val_loss improved from 1.87897 to 1.47184, saving model to checkpoint\cp.ckpt
Epoch 3/100

Epoch 00003: val_loss improved from 1.47184 to 1.22217, saving model to checkpoint\cp.ckpt
Epoch 4/100

Epoch 00004: val_loss improved from 1.22217 to 1.04888, saving model to checkpoint\cp.ckpt
Epoch 5/100

Epoch 00005: val_loss improved from 1.04888 to 0.97517, saving model to checkpoint\cp.ckpt
Epoch 6/100

Epoch 00006: val_loss improved from 0.97517 to 0.87522, saving model to checkpoint\cp.ckpt
Epoch 7/100

Epoch 00007: val_loss improved from 0.87522 to 0.84764, saving model to checkpoint\cp.ckpt
Epoch 8/100

Epoch 00008: val_loss improved from 0.84764 to 0.79615, saving model to checkpoint\cp.ckpt
Epoch 9/100

Epoch 00009: val_loss improved from 0.79615 to 0.73176, saving model to checkpoint\cp.ckpt
Epoch 10/100

Epoch 00010: val_loss did not improve from 0.73176
Epo


Epoch 00046: val_loss improved from 0.30400 to 0.29350, saving model to checkpoint\cp.ckpt
Epoch 47/100

Epoch 00047: val_loss did not improve from 0.29350
Epoch 48/100

Epoch 00048: val_loss did not improve from 0.29350
Epoch 49/100

Epoch 00049: val_loss did not improve from 0.29350
Epoch 50/100

Epoch 00050: val_loss improved from 0.29350 to 0.26895, saving model to checkpoint\cp.ckpt
Epoch 51/100

Epoch 00051: val_loss did not improve from 0.26895
Epoch 52/100

Epoch 00052: val_loss did not improve from 0.26895
Epoch 53/100

Epoch 00055: val_loss did not improve from 0.26468
Epoch 56/100

Epoch 00056: val_loss did not improve from 0.26468
Epoch 57/100

Epoch 00057: val_loss improved from 0.26468 to 0.26092, saving model to checkpoint\cp.ckpt
Epoch 58/100

Epoch 00058: val_loss did not improve from 0.26092
Epoch 59/100

Epoch 00059: val_loss improved from 0.26092 to 0.24566, saving model to checkpoint\cp.ckpt
Epoch 60/100

Epoch 00060: val_loss did not improve from 0.24566
Epoch 61


Epoch 00032: val_loss did not improve from 0.28120
Epoch 33/100

Epoch 00033: val_loss did not improve from 0.28120
Epoch 34/100

Epoch 00034: val_loss did not improve from 0.28120
Epoch 35/100

Epoch 00035: val_loss improved from 0.28120 to 0.26933, saving model to checkpoint\cp.ckpt
Epoch 36/100

Epoch 00036: val_loss improved from 0.26933 to 0.26230, saving model to checkpoint\cp.ckpt
Epoch 37/100

Epoch 00037: val_loss improved from 0.26230 to 0.24869, saving model to checkpoint\cp.ckpt
Epoch 38/100

Epoch 00038: val_loss did not improve from 0.24869
Epoch 39/100

Epoch 00039: val_loss improved from 0.24869 to 0.24168, saving model to checkpoint\cp.ckpt
Epoch 40/100

Epoch 00040: val_loss did not improve from 0.24168
Epoch 41/100

Epoch 00041: val_loss improved from 0.24168 to 0.23659, saving model to checkpoint\cp.ckpt
Epoch 42/100

Epoch 00042: val_loss did not improve from 0.23659
Epoch 43/100

Epoch 00043: val_loss improved from 0.23659 to 0.21963, saving model to checkpoint\c


Epoch 00014: val_loss improved from 0.73719 to 0.70519, saving model to checkpoint\cp.ckpt
Epoch 15/100

Epoch 00015: val_loss improved from 0.70519 to 0.67691, saving model to checkpoint\cp.ckpt
Epoch 16/100

Epoch 00016: val_loss did not improve from 0.67691
Epoch 17/100

Epoch 00017: val_loss improved from 0.67691 to 0.64974, saving model to checkpoint\cp.ckpt
Epoch 18/100

Epoch 00018: val_loss improved from 0.64974 to 0.61917, saving model to checkpoint\cp.ckpt
Epoch 19/100

Epoch 00019: val_loss improved from 0.61917 to 0.61241, saving model to checkpoint\cp.ckpt
Epoch 20/100

Epoch 00020: val_loss improved from 0.61241 to 0.59808, saving model to checkpoint\cp.ckpt
Epoch 21/100

Epoch 00021: val_loss improved from 0.59808 to 0.58365, saving model to checkpoint\cp.ckpt
Epoch 22/100

Epoch 00022: val_loss improved from 0.58365 to 0.57194, saving model to checkpoint\cp.ckpt
Epoch 23/100

Epoch 00023: val_loss improved from 0.57194 to 0.55205, saving model to checkpoint\cp.ckpt
Epo


Epoch 00050: val_loss improved from 0.38817 to 0.36388, saving model to checkpoint\cp.ckpt
Epoch 51/100

Epoch 00051: val_loss did not improve from 0.36388
Epoch 52/100

Epoch 00052: val_loss did not improve from 0.36388
Epoch 53/100

Epoch 00053: val_loss did not improve from 0.36388
Epoch 54/100

Epoch 00054: val_loss improved from 0.36388 to 0.34593, saving model to checkpoint\cp.ckpt
Epoch 55/100

Epoch 00055: val_loss did not improve from 0.34593
Epoch 56/100

Epoch 00056: val_loss did not improve from 0.34593
Epoch 57/100

Epoch 00057: val_loss improved from 0.34593 to 0.33924, saving model to checkpoint\cp.ckpt
Epoch 58/100

Epoch 00058: val_loss improved from 0.33924 to 0.33280, saving model to checkpoint\cp.ckpt
Epoch 59/100

Epoch 00059: val_loss did not improve from 0.33280
Epoch 60/100

Epoch 00060: val_loss improved from 0.33280 to 0.33110, saving model to checkpoint\cp.ckpt
Epoch 61/100

Epoch 00061: val_loss did not improve from 0.33110
Epoch 62/100

Epoch 00062: val_lo

In [28]:
# 저장한 모델 불러오기
for i in range(5): # 모델 갯수
    globals()[f'model{i}'] = load_model(f'models/211008(1)_model_{i}.h5')

In [29]:
model0._name = 'Client0'
model1._name = 'Client1'
model2._name = 'Client2'
model3._name = 'Client3'
model4._name = 'Client4'

In [30]:
inputs = Input(shape=(n_steps, n_length, n_features))

merge = concatenate([model0(inputs), model1(inputs), model2(inputs), model3(inputs), model4(inputs)])
hidden = Dense(10, activation='relu')(merge)
output = Dense(61, activation='softmax')(hidden)
model = tf.keras.models.Model(inputs=inputs, outputs=output)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

stfold = StratifiedKFold(n_splits=3, shuffle=True)
idx_iter = 0 
skf_accuracy=[]

for train_idx, valid_idx in stfold.split(X, y_train_total) : 
    Y_train, Y_valid = tf.gather(y, train_idx), tf.gather(y, valid_idx)
    X_train, X_valid = tf.gather(X, train_idx), tf.gather(X, valid_idx)

    checkpoint_path = "checkpoint/cp1.ckpt"
    cp_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True, mode='min')

    early_stopping = EarlyStopping(monitor='loss', patience=4, mode='min')
    model.fit(X_train, Y_train, epochs=30, batch_size=batch_size, validation_split=0.02, callbacks=[early_stopping, cp_callback])
    pred = model.predict(X_valid)

    # 반복 시 마다 정확도 측정 -> 수정해야함; 
    idx_iter += 1 
    y_pred = (pred > 0.5) 
    accuracy = np.round(accuracy_score(Y_valid, y_pred), 4)
    train_size = X_train.shape[0]
    test_size = X_valid.shape[0]

    print("\n##### 교차 검증: {}, 정확도: {}  #####" .format(idx_iter, accuracy))
    print('학습 레이블 데이터 분포:\n ', Y_train.shape[0])
    print('검증 레이블 데이터 분포:\n ', Y_valid.shape[0], '\n\n')

Epoch 1/30

Epoch 00001: val_loss improved from inf to 0.68828, saving model to checkpoint\cp1.ckpt
Epoch 2/30

Epoch 00002: val_loss improved from 0.68828 to 0.50759, saving model to checkpoint\cp1.ckpt
Epoch 3/30

Epoch 00003: val_loss improved from 0.50759 to 0.43004, saving model to checkpoint\cp1.ckpt
Epoch 4/30

Epoch 00004: val_loss improved from 0.43004 to 0.37642, saving model to checkpoint\cp1.ckpt
Epoch 5/30

Epoch 00005: val_loss improved from 0.37642 to 0.32259, saving model to checkpoint\cp1.ckpt
Epoch 6/30

Epoch 00006: val_loss improved from 0.32259 to 0.31998, saving model to checkpoint\cp1.ckpt
Epoch 7/30

Epoch 00007: val_loss improved from 0.31998 to 0.29588, saving model to checkpoint\cp1.ckpt
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.29588
Epoch 9/30

Epoch 00009: val_loss improved from 0.29588 to 0.27344, saving model to checkpoint\cp1.ckpt
Epoch 10/30

Epoch 00010: val_loss improved from 0.27344 to 0.23091, saving model to checkpoint\cp1.ckpt
Epoc

In [None]:
prediction = model.predict(test_X)
prediction.shape

In [None]:
submission

In [None]:
submission.iloc[:,1:]=prediction

In [None]:
submission.to_csv('cnn_LSTM_stacked5_fold3(jy_last).csv', index=False)

In [None]:
submission

In [None]:
sub = pd.read_csv('test_c_l.csv')

In [None]:
def result_pred(sub1, sub2) :
    subpre1 = []
    subpre2 = []
    corcount = 0
    
    for i in range(len(sub1)) :
        subpre1.append(sub1.iloc[i,1:].idxmax())
        subpre2.append(sub2.iloc[i,1:].idxmax())

    subpre1 = pd.DataFrame(subpre1, columns = ['label'])
    subpre1.head()
    
    subpre2 = pd.DataFrame(subpre2, columns = ['label'])
    subpre2.head()
    
    print("파일 1 운동중 라벨 수 :", len(subpre1[subpre1['label'] != '26']), "\n파일 2 운동중 라벨 수 :", len(subpre2[subpre2['label'] != '26']))
    for i in range(len(subpre1)) :
        if (subpre1['label'][i] == subpre2['label'][i]) : 
            corcount += 1
    print("예측 결과 일치 개수 :", corcount)
    
    return subpre1

In [None]:
sub1 = pd.read_csv('cnn_LSTM_last(jun).csv')
sub2 = pd.read_csv('cnn_LSTM_stacked5_fold3(jy_epoch10_batch256).csv')

subpre1 = result_pred(sub1, sub2)
subpre1

In [None]:
y == 

In [None]:
subpre1.to_csv('subpre1.csv')

In [None]:
sub1 = pd.read_csv('cnn_LSTM_stacked(jun).csv')
sub2 = pd.read_csv('cnn_LSTM_stacked5_fold3(jy_last).csv')

subpre1 = result_pred(sub1, sub2)
subpre1.head(15)

In [None]:
sub1 = pd.read_csv(path+'baseline_aug_1.csv')
subpre3 = []

for i in range(len(sub1)) :
    subpre3.append(sub1.iloc[i,1:].idxmax())

subpre3 = pd.DataFrame(subpre3, columns = ['label'])
subpre3.head()

In [None]:
len(subpre3[subpre3['label'] != '26']) # 1.8747789247

In [None]:
sub1 = pd.read_csv(path+'baseline_aug(npTodf).csv')
subpre3 = []

for i in range(len(sub1)) :
    subpre3.append(sub1.iloc[i,1:].idxmax())

subpre3 = pd.DataFrame(subpre3, columns = ['label'])

len(subpre3[subpre3['label'] != '26']) # 168

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

stfold = StratifiedKFold(n_splits=3, shuffle=True)
idx_iter = 0 
skf_accuracy=[]
epochs = 10
batch_size = 256

for train_idx, valid_idx in stfold.split(X, y_train_total) : 
    Y_train, Y_valid = tf.gather(y, train_idx), tf.gather(y, valid_idx)
    X_train, X_valid = tf.gather(X, train_idx), tf.gather(X, valid_idx)

    checkpoint_path = "checkpoint/cp.ckpt"
    cp_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True, mode='min')

    early_stopping = EarlyStopping(monitor='loss', patience=5, mode='min')
    model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_split=0.02, callbacks=[early_stopping, cp_callback])
    pred = model.predict(X_valid)

    # 반복 시 마다 정확도 측정 -> 수정해야함; 
    idx_iter += 1 
    y_pred = (pred > 0.5) 
    accuracy = np.round(accuracy_score(Y_valid, y_pred), 4)
    train_size = X_train.shape[0]
    test_size = X_valid.shape[0]

    print("\n##### 교차 검증: {}, 정확도: {}  #####" .format(idx_iter, accuracy))
    print('학습 레이블 데이터 분포:\n ', Y_train.shape[0])
    print('검증 레이블 데이터 분포:\n ', Y_valid.shape[0], '\n\n')