In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from datetime import datetime

# sklearn
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics

# keras
import keras
from keras import models, optimizers
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, LSTM
from keras import Model ,models, layers, optimizers, regularizers
from keras.callbacks import ModelCheckpoint
import keras.backend.tensorflow_backend as K
import tensorflow as tf

import gc

In [None]:
# 데이터 셋 load
valid_label = np.load('dataset/valid_label_AE.npy')
test_label = np.load('dataset/test_label_AE.npy')

train_normal_scaled = np.load('dataset/train_normal_scaled_AE.npy')
valid_normal_scaled = np.load('dataset/valid_normal_scaled_AE.npy')
valid_scaled = np.load('dataset/valid_scaled_AE.npy')
test_scaled = np.load('dataset/test_scaled_AE.npy')

In [None]:

# timesteps 설정
# ************************** #
timesteps = 10
# ************************** #

# timesteps로 나눈 나머지 산출
train_normal_rest = -(train_normal_scaled.shape[0] % timesteps)
valid_normal_rest = -(valid_normal_scaled.shape[0] % timesteps)
valid_rest = -(valid_scaled.shape[0] % timesteps)
test_rest = -(test_scaled.shape[0] % timesteps)

# 나머지만큼 데이터셋 절삭
train_normal_scaled = train_normal_scaled[:train_normal_rest]
valid_normal_scaled = valid_normal_scaled[:valid_normal_rest]
valid_scaled = valid_scaled[:valid_rest]
test_scaled = test_scaled[:test_rest]

valid_label = valid_label[:valid_rest]
test_label = test_label[:test_rest]

# 3차원으로 데이터 변환
# reshape input to be 3D [samples, timesteps, features]
train_normal_scaled_reshape = train_normal_scaled.reshape((int(train_normal_scaled.shape[0]/timesteps), timesteps, train_normal_scaled.shape[1]))
valid_normal_scaled_reshape = valid_normal_scaled.reshape((int(valid_normal_scaled.shape[0]/timesteps), timesteps, valid_normal_scaled.shape[1])) 
valid_scaled_reshape = valid_scaled.reshape((int(valid_scaled.shape[0]/timesteps), timesteps, valid_scaled.shape[1])) 
test_scaled_reshape = test_scaled.reshape((int(test_scaled.shape[0]/timesteps), timesteps, test_scaled.shape[1])) 

In [None]:
lr = 0.001 # 학습률(learning rate)
batch = 100 # batch size
epochs = 10

# 모델생성
with K.tf.device('/gpu:0'):
    gru_ae = models.Sequential()
    # Encoder
    gru_ae.add(layers.GRU(64, activation='relu', input_shape=(timesteps, 78), return_sequences=True))
    gru_ae.add(layers.GRU(32, activation='relu', return_sequences=True))
    gru_ae.add(layers.GRU(16, activation='relu', return_sequences=False))
    gru_ae.add(layers.RepeatVector(timesteps))
    
    # Decoder
    gru_ae.add(layers.GRU(16, activation='relu', return_sequences=True))
    gru_ae.add(layers.GRU(32, activation='relu', return_sequences=True))
    gru_ae.add(layers.GRU(64, activation='relu', return_sequences=True))
    gru_ae.add(layers.TimeDistributed(layers.Dense(78)))
          
   # compile
    gru_ae.compile(loss='mse', optimizer=optimizers.Adam(lr=lr), metrics=['accuracy'],)
          
    # fit
    history = gru_ae.fit(train_normal_scaled_reshape, train_normal_scaled_reshape,
                              epochs=epochs, batch_size=batch,
                              validation_data=(valid_normal_scaled_reshape, valid_normal_scaled_reshape))
    # 모델 저장
    gru_ae.save('./model/gru_ae.h5')

In [None]:

# timesteps 설정
# ************************** #
timesteps = 10
# ************************** #

# timesteps로 나눈 나머지 산출gru_ae = models.load_model('./model/gru_ae.h5')

# valid 데이터셋으로 예측 수행
valid_predictions = gru_ae.predict(valid_scaled_reshape)

# 복원 오차 산출
mse = np.mean(np.power(valid_scaled - valid_predictions.reshape(valid_predictions.shape[0]*timesteps, 78), 2), axis=1)


best_f1 = 0
best_th = 0.9
i = best_th

# 최적의 threshold 산출
while True:
    if i == 0.1:
        break
        
    mse_th = mse.copy()
    mse_th[mse_th < i] = 0
    mse_th[mse_th >= i] = 1
    
    f1 = f1_score(valid_label_rest, mse_th)*100
    if f1 > best_f1:
        pr = precision_score(label, result_cp)*100
        if pr > best_pr:
            best_th = i
            best_f1 = f1
            best_pr = pr
    i -= 0.0002
    del [[mse_th]]

print('\n\n')        
print('best f1 : ', best_f1)
print('best th : ', best_th)
x_train_y0_rest = -(x_train_y0_scaled.shape[0] % timesteps)
x_valid_y0_rest = -(x_valid_y0_scaled.shape[0] % timesteps)
x_valid_rest = -(x_valid_scaled.shape[0] % timesteps)
x_test_rest = -(x_test_scaled.shape[0] % timesteps)

# 나머지만큼 데이터셋 절삭
x_train_y0_scaled = x_train_y0_scaled[:x_train_y0_rest]
x_valid_y0_scaled = x_valid_y0_scaled[:x_valid_y0_rest]
x_valid_scaled = x_valid_scaled[:x_valid_rest]
x_test_scaled = x_test_scaled[:x_test_rest]

valid_label_rest = valid_label[:x_valid_rest]
test_label_rest = test_label[:x_test_rest]

# 3차원으로 데이터 변환
# reshape input to be 3D [samples, timesteps, features]
x_train_y0_scaled_reshape = x_train_y0_scaled.reshape((int(x_train_y0_scaled.shape[0]/timesteps), timesteps, x_train_y0_scaled.shape[1])) # 정상 데이터 셋
x_valid_y0_scaled_reshape = x_valid_y0_scaled.reshape((int(x_valid_y0_scaled.shape[0]/timesteps), timesteps, x_valid_y0_scaled.shape[1])) # 테스트 데이터 셋
x_valid_scaled_reshape = x_valid_scaled.reshape((int(x_valid_scaled.shape[0]/timesteps), timesteps, x_valid_scaled.shape[1])) # 테스트 데이터 셋
x_test_scaled_reshape = x_test_scaled.reshape((int(x_test_scaled.shape[0]/timesteps), timesteps, x_test_scaled.shape[1])) # 테스트 데이터 셋

In [None]:
# test 데이터 셋으로 예측 수행
test_predictions = lstm_ae.predict(test_scaled_reshape)

# 복원 오차 산출
mse = np.mean(np.power(test_scaled - test_predictions.reshape(test_predictions.shape[0]*timesteps, 78), 2), axis=1)

# threshold 기준으로 분류
mse[mse < best_th] = 0
mse[mse >= best_th] = 1

# 결과 가시화
print("accuracy_score :",accuracy_score(test_label_rest, mse)*100)
print("recall_score :",recall_score(test_label_rest, mse)*100)
print("precision_score :",precision_score(test_label_rest, mse)*100)
print("f1_score :",f1_score(test_label_rest, mse)*100)