In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix

timesteps = 3
data_size = 10000 # datasize selected must have both attack and normal data.
data_resize = int(data_size//timesteps) #data_size/timesteps using // because round down, example 10/3=3
data_trunc_size = data_resize * timesteps # remove extra rows for so that data can be divided by timesteps

num_classes = timesteps # follow timestep
data_dim = 36
batchsize = 32 # number of data in a batch
drop = 0.2

#%%
# load dataset
dataset = pd.read_csv("CyberSecurity.csv")
dataset.drop(columns=["Timestamp1", "AIT402","FIT401","LIT401","P402","UV401","AIT501","AIT502","FIT501","FIT502","FIT503","FIT504","P501","PIT501","PIT502","PIT503","Normal/Attack"],inplace=True)
x_data = dataset.iloc[:,0:data_dim].values
y_data = dataset.iloc[:,data_dim].values

# Reduce the size of data so that the data can be divided by time step
# split into input (X) and output (Y) variables
X = x_data[:data_trunc_size,0:data_dim]
Y = y_data[:data_trunc_size]

# required format for lstm
X_shaped = X.reshape(data_resize, timesteps, data_dim) 
Y_shaped = Y.reshape(data_resize,timesteps)

print("X shape is : {}".format(X_shaped.shape))
print("Y shape is : {}".format(Y_shaped.shape))

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_shaped, Y_shaped, test_size=0.3)

# Create the model
# expected input data shape: (batch_size, timesteps, data_dim)
# Dropout used to prevent over-fitting.
# Input shape will infer the batch size by itself
# We are using binary cross entropy even when num_class can be > 1 because this is a binary classification on an array
# For multiclass classification, use categorical cross-entropy
model = Sequential()
model.add(LSTM(36, return_sequences=True, input_shape=(timesteps, data_dim)))  # returns a sequence of vectors of dimension 40
model.add(Dropout(drop))
model.add(LSTM(36,return_sequences=True))  # returns a sequence of vectors of dimension 40
model.add(Dropout(drop))
model.add(LSTM(36,return_sequences=True))  # returns a sequence of vectors of dimension 40
model.add(Dropout(drop))
model.add(LSTM(36))  # return a single vector of dimension 40
model.add(Dropout(drop))
model.add(Dense(num_classes, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, batch_size= batchsize, epochs=1, validation_data= (X_test, y_test))

# Evaluate the model
# Returns you the accuracy and loss
loss, acc = model.evaluate(X_train, y_train,timesteps)
print("Keras: \n%s: %.2f%%" % (model.metrics_names[1], acc*100))

# Shape of prediction is nrow * timestep
# Result would be that same as keras evaluate
proba = model.predict(X_test)

# proba is the probability. Here we set threshold as 0.5 to be considered true.
print('predictions shape:', proba.shape)
y_pred = proba > 0.3
# Reshape to a single dimension for comparison and to create confusion matrix
y_pred_single_dim = y_pred.reshape(proba.shape[0]*proba.shape[1])
y_test_single_dim = y_test.reshape(y_test.shape[0]*y_test.shape[1])
print(y_pred_single_dim)
print(y_test_single_dim)
matrix = confusion_matrix(y_test_single_dim, y_pred_single_dim)
print("Keras: \n%s: %.2f%%" % (model.metrics_names[1], sum(y_pred_single_dim==y_test_single_dim)/len(y_test_single_dim)*100))
print(matrix)


X shape is : (3333, 3, 36)
Y shape is : (3333, 3)
Keras: 
accuracy: 86.50%
predictions shape: (1000, 3)
[False False False ... False False False]
[0 0 0 ... 1 1 1]
Keras: 
accuracy: 85.47%
[[2144   21]
 [ 415  420]]
