In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, normalize
import keras
from keras.models import Sequential
from keras.layers.advanced_activations import LeakyReLU
from keras.layers import Dense, Dropout
from keras.metrics import CategoricalAccuracy, TruePositives, TrueNegatives, FalsePositives, FalseNegatives

Using TensorFlow backend.


In [2]:
df = pd.read_csv('Balanced_Dataset.csv')

In [3]:
del df['Unnamed: 0']

In [4]:
df.shape

(28730, 21)

## This time the data is balanced

In [6]:
df[' Label'].value_counts()

BENIGN       14365
DrDoS_NTP    14365
Name:  Label, dtype: int64

In [7]:
X = normalize( ( df.loc[:, df.columns != ' Label'] ).values )
y = df[' Label']
label_encoder = LabelEncoder()
y = y.map(lambda value : 0 if value == 'BENIGN' else 1)
y = keras.utils.to_categorical(y, num_classes=y.nunique())

In [8]:
random_state = 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=True, stratify=y, random_state=random_state)

In [9]:
accuracy = CategoricalAccuracy() 
tp = TruePositives()
tn = TrueNegatives()
fp = FalsePositives()
fn = FalseNegatives()
metrics = [accuracy, tp, tn, fp, fn]

model = Sequential()
model.add(Dense(64, input_shape=(len(df.columns) - 1 ,))) 
model.add(LeakyReLU(alpha=0.3))
model.add(Dense(128))
model.add(LeakyReLU(alpha=0.3))
model.add(Dense(32))
model.add(LeakyReLU(alpha=0.3))
model.add(Dense(8))
model.add(LeakyReLU(alpha=0.3))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=metrics)

model.fit(x=X_train, y=y_train, epochs=20, batch_size=512, verbose=0, validation_data=(X_test, y_test))

scores = model.evaluate(X_test, y_test, verbose=2)
acc, loss, tpn, tnn, fpn, fnn = scores[1]*100, scores[0]*100, scores[2], scores[3], scores[4], scores[5]
totaln = tpn + tnn + fpn + fnn
print('Baseline: accuracy: {:.2f}%: loss: {:2f}'.format(acc, loss))
print('\tTrue Positive Rate: {} ({})'.format(tpn/totaln, tpn))
print('\tTrue Negative Rate: {} ({})'.format(tnn/totaln, tnn))
print('\tFalse Positive Rate: {} ({})'.format(fpn/totaln, fpn))
print('\tFalse Negative Rate: {} ({})'.format(fnn/totaln, fnn))

Baseline: accuracy: 97.29%: loss: 7.934553
	True Positive Rate: 0.4864465773652568 (9224.0)
	True Negative Rate: 0.4864465773652568 (9224.0)
	False Positive Rate: 0.01355342263474317 (257.0)
	False Negative Rate: 0.01355342263474317 (257.0)
