## 라이브러리 import

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
# 재현성을 위한 코드
random_seed = 777

tf.keras.utils.set_random_seed(random_seed)
# tf.random.normal([5], 0, 1)

## 데이터 불러오기

In [None]:
TrainData = np.array(pd.read_csv('./ProcessedData/TrainData.csv', sep = ",", header = None))
ValidData = np.array(pd.read_csv('./ProcessedData/ValidData.csv', sep = ",", header = None))

TrainLabel = np.array(pd.read_csv('./ProcessedData/TrainLabel.csv', sep = ",", header = None))
ValidLabel = np.array(pd.read_csv('./ProcessedData/ValidLabel.csv', sep = ",", header = None))

TrainData.shape, ValidData.shape, TrainLabel.shape, ValidLabel.shape

## Keras 기반 ANN(Artificial Neural Network) 구조 설계를 위한 기본 사항

In [None]:
# 모델 선언, 입력 Layer 삽입
keras.backend.clear_session() # 세션 지우기: Keras에서 가지고 있는 변수 초기화
model = keras.Sequential()
model.add(keras.layers.InputLayer(shape = (20,)))
model.summary()

In [None]:
# 모델에 Dense Layer 삽입
model.add(keras.layers.Dense(units = 10, name = 'Layer1'))
model.summary()

In [None]:
# 활성화 함수가 있는 Dense Layer 넣기, 파라미터 값 초기화
model.add(keras.layers.Dense(units = 10, activation = keras.activations.relu,
                             kernel_initializer = keras.initializers.Zeros(), name = 'Layer2'))
model.summary()

In [None]:
# 모델의 Layer 별 파라미터 보기
model.get_weights()[-1]

In [None]:
# 마지막 Layer 지우기
model.pop()
model.summary()

In [None]:
# 모델 
model.add(keras.layers.Dense(units = 2, activation = keras.activations.relu, name = 'Layer3'))
model.summary()

## ANN(Artificial Neural Network) hyperparameter 설정

In [None]:
learningRate  = 0.0001
noOfNeuron    = 16
Epoch         = 500

## Keras 기반 ANN 구조(Architecture) 설계

https://keras.io/api/layers/activations/ : 활성화 함수 참고   
https://keras.io/api/optimizers/: 최적화 알고리즘 참고   
https://keras.io/api/losses/probabilistic_losses/ : 분류모델 손실함수 참고

In [None]:
def ANN(input_data):
    keras.backend.clear_session() # 세션 지우기
    tf.keras.utils.set_random_seed(random_seed)

    model = keras.Sequential()
    model.add(keras.layers.InputLayer(shape = (input_data.shape[1],) ))                                            # Input  Layer
    model.add(keras.layers.Dense(units = noOfNeuron, activation = keras.activations.relu,    name = 'Hidden1'))    # Hidden Layer 1
    model.add(keras.layers.Dense(units = noOfNeuron, activation = keras.activations.relu,    name = 'Hidden2'))    # Hidden Layer 2
    model.add(keras.layers.Dense(units = 2,          activation = keras.activations.softmax, name = 'Output'))     # Output Layer
    
    model.compile(optimizer = keras.optimizers.Adam(learning_rate = learningRate), # 최적화 알고리즘
                  loss = keras.losses.CategoricalCrossentropy(),                   # 손실 함수(최적화 목적 함수)
                  metrics = ['accuracy'])                                          # 학습 과정에서 측정할 지표
    return model

In [None]:
# 모델 만들기
ANN_model = ANN(TrainData)

# 모델 구조, 파라미터 개수 확인
ANN_model.summary()

In [None]:
# 파라미터 형태 직접 확인
for ind in range(len(ANN_model.get_weights())):
    print(ANN_model.get_weights()[ind].shape)

## Keras 기반 ANN 학습 및 평가

In [None]:
# 모델 학습/검증
hist = ANN_model.fit(TrainData, TrainLabel, epochs=Epoch, verbose = 1)
Loss, Accuracy = ANN_model.evaluate(TrainData,  TrainLabel, verbose=0)

print('Final Loss and Accuracy: {:.4f}, {:.2f}%'.format(Loss, Accuracy*100))

In [None]:
# 학습 과정 확인(Loss, Accuracy)
fig, loss_ax = plt.subplots()
acc_ax = loss_ax.twinx()

loss_ax.plot(hist.history['loss'], label='train loss', color = "tab:red")
loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
loss_ax.legend(loc='upper left')

acc_ax.plot(hist.history['accuracy'], label='train acc', color = "tab:blue")
acc_ax.set_ylabel('accuracy')
acc_ax.legend(loc='lower left')

plt.show()

In [None]:
Loss, Accuracy = ANN_model.evaluate(ValidData,  ValidLabel, verbose=0)

print('Validation Accuracy: {:.2f}%'.format(Accuracy*100))

In [None]:
# 모델 저장
ANN_model.save('./MLmodels/ANN_model.keras')

## ANN model 불러와서 진단

In [None]:
# 모델 불러오기
LoadedModel = keras.models.load_model('./MLmodels/ANN_model.keras')

In [None]:
Loss, Accuracy = LoadedModel.evaluate(ValidData,  ValidLabel, verbose=0)

print('[Performance of ANN model] \n')
print('Accuracy : {:.2f}%'.format(Accuracy*100))

In [None]:
# 예측값(확률) 확인
Predict_y = LoadedModel.predict(ValidData, verbose = 0)
pd.DataFrame(Predict_y)

## ++. Confusion Matrix 확인

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
# 분류 결과 확인
Pred = LoadedModel.predict(ValidData)
pd.DataFrame(Pred)

In [None]:
# One-hot encoding -> Label encoding
true = np.argmax(ValidLabel, axis=-1)
pred = np.argmax(Pred, axis=-1)

# Confusion Matrix 생성
CM = confusion_matrix(true, pred).T

# 데이터의 실제/분류결과 이름
x = ['Normal_True', 'Abnormal_True']
y = ['Normal_Pred', 'Abnormal_Pred']

CM_df = pd.DataFrame(CM,columns=x, index=y)
CM_df

In [None]:
# heatmap font size
sns.set(font_scale=1.2)

# Heatmap 형태로 확인
fig = plt.subplots(figsize=(8,8))


# 퍼센트
sns.heatmap(CM_df/np.sum(CM_df),
            cmap='Blues',          # cmap Color
            annot=True,            # Value Text
            fmt=".1%",             # Value type (integer = "d")
            linewidths=2)

# 갯수
# sns.heatmap(CM_df,
#             cmap='Blues',        # cmap Color
#             annot=True,          # Value Text
#             fmt="d",             # Value type (integer = "d")
#             linewidths=2) 



plt.show()