In [0]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from keras.models import Model, load_model
from keras.layers import Input, Dense, Dropout
from keras import regularizers
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
from sklearn import metrics
import matplotlib.pyplot as plt


import time
import copy
import math

In [0]:
from google.colab import drive
drive.mount('/content/drive/')
path = '/content/drive/My Drive/attack detection/project/'

# Read the data

In [0]:
sync_method = ['averaging', 'oversampling', 'downsampling']
num_of_changed_features = ['2', '3']
normalizing_method = ['min_max', 'standard']

sync_method = sync_method[0] 
num_of_changed_features = num_of_changed_features[1]
normalizing_method = normalizing_method[1]

data_df = pd.read_csv(path + 'data/sync_data_' + sync_method + '_anomal_' + num_of_changed_features + '_' + normalizing_method + '.csv')

In [0]:
data_df.head()

# Split to train test - train include only normal data and test include only anomalies data

In [0]:
x_data = data_df.iloc[:,:-1]
y_data = data_df.iloc[:,-1]

In [0]:
x_data.head()

In [0]:
y_data.head()

In [0]:
train_idx = y_data[y_data==0.0].index.values

x_train = x_data.iloc[train_idx]
y_train = y_data[train_idx]
x_test = x_data 
y_test = y_data 

In [0]:
x_train.head()

# Autoencoder

In [0]:
nb_epoch = 500
batch_size = 64


# Input dimension size (first and last autoencoder layer size)
input_dim = x_train.shape[1]

# Setting the auto encoder layers
input_layer = Input(shape=(input_dim, ))

encoder = Dense(4, activation="relu")(input_layer) 

encoder = Dense(2, activation="relu")(encoder) 

decoder = Dense(4, activation="relu")(encoder)

decoder = Dense(input_dim, activation='sigmoid')(decoder) 

autoencoder = Model(inputs=input_layer, outputs=decoder)

autoencoder.summary()

In [0]:
# Configures the learning process of the network
autoencoder.compile(optimizer='adam',loss='mean_squared_error',metrics=['mse'])

# Train the autoencoder based on the best epoch, returns history object
history = autoencoder.fit(x_train, x_train, epochs=nb_epoch, batch_size=batch_size, verbose=2, validation_split=0.1)

In [0]:
# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

In [0]:
# Predictions and results for the TEST set

predictions = autoencoder.predict(x_test)
square_errors = np.power(x_test - predictions, 2)
mse = np.mean(square_errors, axis=1)
mse_series = pd.Series(mse)
 
most_anomal_trx = mse_series.sort_values(ascending=False)
columns=["id", "mse_all_columns", "label"]
items = []
for x in most_anomal_trx.iteritems():
    item = [x[0], x[1], 1]
    items.append(item)
    
df_anomal_trx = pd.DataFrame(items, columns=columns)
df_anomal_trx.set_index('id', inplace=True)

y_pred = df_anomal_trx.head(y_data[y_data==1.0].shape[0])
y_true = y_test[y_pred.index]

print('found ' + str(y_true.sum()) + ' anomalies out of ' + str(y_data[y_data==1.0].shape[0]))