In [8]:
import numpy as np
import pandas as pd
from keras.models import Model, load_model
from keras.layers import Input, Dense, Dropout
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import tensorflow as tf

In [9]:
def read_csv(path):
  datas = pd.read_csv(path, index_col=False)
  labels = datas['is_anomaly']
  datas = datas.drop('is_anomaly', axis=1)

  np_labels = labels.to_numpy()
  np_datas = datas.to_numpy()

  return np_datas, np_labels

In [10]:
def plot(history, i) :
  plt.plot(history.history['loss'])
  plt.plot(history.history['mean_squared_error'])
  plt.title('Model loss for fold ' + str(i))
  plt.ylabel('MSE')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Validation'], loc='upper right')
  plt.show()

In [11]:
# def call_model_dropout(datas):
#   row, col = datas.shape

#   input_l = Input(shape=(col,))
#   encode_l = Dense(col/2 , activation='tanh')(input_l)
#   do1 = Dropout(0.5)(encode_l)
#   latent_space = Dense(1, activation='tanh')(do1)
#   do2 = Dropout(0.5)(latent_space)
#   decode_l = Dense(col/2, activation='sigmoid')(do2)
#   do3 = Dropout(0.5)(decode_l)
#   output_l = Dense(col, activation='sigmoid')(do3)

#   autoencoder = Model(input_l, output_l)

#   autoencoder.compile(
#     optimizer='adam',
#     loss='mean_squared_error',
#     metrics=['mean_squared_error']
#     )

#   # autoencoder.summary()

#   return autoencoder

def call_model_vanilla_dropout(datas, do):
  row, col = datas.shape

  input_l = Input(shape=(col,))
  latent_space = Dense(1, activation='sigmoid')(input_l)
  do1 = Dropout(do)(latent_space)
  output_l = Dense(col, activation='tanh')(do1)

  autoencoder = Model(input_l, output_l)

  autoencoder.compile(
    optimizer='adam',
    loss='mean_squared_error',
    metrics=['mean_squared_error']
    )

  # autoencoder.summary()

  return autoencoder

# def call_model_vanilla(datas):
#   row, col = datas.shape

#   input_l = Input(shape=(col,))
#   latent_space = Dense(1, activation='sigmoid')(input_l)
#   output_l = Dense(col, activation='tanh')(latent_space)

#   autoencoder = Model(input_l, output_l)

#   autoencoder.compile(
#     optimizer='adam',
#     loss='mean_squared_error',
#     metrics=['mean_squared_error']
#     )

#   # autoencoder.summary()

#   return autoencoder

# def call_model(datas):
#   row, col = datas.shape

#   input_l = Input(shape=(col,))
#   encode_l = Dense(col/2 , activation='tanh')(input_l)
#   latent_space = Dense(1, activation='tanh')(encode_l)
#   decode_l = Dense(col/2, activation='sigmoid')(latent_space)
#   output_l = Dense(col, activation='sigmoid')(decode_l)

#   autoencoder = Model(input_l, output_l)

#   autoencoder.compile(
#     optimizer='adam',
#     loss='mean_squared_error',
#     metrics=['mean_squared_error']
#     )

#   # autoencoder.summary()

#   return autoencoder

In [12]:
path = './datasets/pd-test'

datas, labels = read_csv(path)
row, col = datas.shape

datas.shape

(22543, 122)

In [None]:
GRIT_SEARCH = [[1024, 1, 0.5]] # [ batch size, epoch, dropout ]
CV = 5

for BATCH_SIZE, EPOCH, do in GRIT_SEARCH:

  kf = KFold(CV, shuffle=True, random_state=123)

  n_fold = 1
  models = []
  each_fold_mse = []
  with tf.device('/GPU:0'):
    for train, test in kf.split(datas):
      # autoencoder = call_model(datas)
      # autoencoder = call_model_dropout(datas)
      # autoencoder = call_model_vanilla(datas)
      autoencoder = call_model_vanilla_dropout(datas, do)
      history = autoencoder.fit(
        datas[train], datas[train],
        batch_size=BATCH_SIZE,
        epochs=EPOCH,
        verbose=0,
      )
      scores = autoencoder.evaluate(datas[test], datas[test], verbose=0)
      plot(history, n_fold)
      print(f'Score for fold {n_fold}: {autoencoder.metrics_names[0]} of {scores[0]}; {autoencoder.metrics_names[1]} of {scores[1]*100}%')
      models.append(autoencoder)
      each_fold_mse.append(scores[1])
      n_fold += 1

  model_name = str(EPOCH) + "ep-" + str(BATCH_SIZE) + "bs-" + str(CV) + "cv-" + "model_vanilla_do_" + str(do) # for dropout model
  # model_name = str(EPOCH) + "ep-" + str(BATCH_SIZE) + "bs-" + str(CV) + "cv" # for no dropout model
  model_path = "./models/" + model_name + ".h5"
  models[each_fold_mse.index((max(each_fold_mse)))].save(model_path)