In [1]:
import pandas as pd
import numpy as np
import keras
import matplotlib.pyplot as plt
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential, load_model
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
import joblib
from sklearn import metrics

Using TensorFlow backend.


In [2]:
data = pd.read_csv("../dataset")
data.count()

protocol          17106
transfer_rate     17106
avg_pkt_size      17106
host_ratio        17106
host_num          17106
diff_port_num     17106
diff_sport_num    17106
opp_ratio         17106
label             17106
dtype: int64

In [3]:
data.label.value_counts()

normal         3414
tcpsynflood    3172
portscan       2587
udpflood       2462
icmpflood      2224
ipsweep        2007
pingofdeath    1240
Name: label, dtype: int64

In [4]:
data.label.value_counts(normalize=True)

normal         0.199579
tcpsynflood    0.185432
portscan       0.151233
udpflood       0.143926
icmpflood      0.130013
ipsweep        0.117327
pingofdeath    0.072489
Name: label, dtype: float64

In [5]:
x_columns = data.columns.drop('label')
x = data[x_columns]
x = x.values
y = data['label']
x.shape

(17106, 8)

In [6]:
le_proto = LabelEncoder()
x[:, 0] = le_proto.fit_transform(x[:, 0])
joblib.dump(le_proto, '../preprocess/le_proto-conv2d.joblib')

le_label = LabelEncoder()
y = le_label.fit_transform(y)
joblib.dump(le_label, '../preprocess/le_label-conv2d.joblib')

sc = StandardScaler()
x=sc.fit_transform(x)

joblib.dump(sc, '../preprocess/scaler-conv2d.joblib')

x = x.reshape(x.shape[0],2,4,1)
x.shape

(17106, 2, 4, 1)

In [7]:
model = Sequential()

model.add(Conv2D(16, (2,2),padding = 'same', activation ='relu',input_shape = (2,4,1)))
model.add(MaxPooling2D(pool_size = (2,2), padding = 'same'))
model.add(Conv2D(32, (2,2),padding = 'same',activation ='relu'))
model.add(MaxPooling2D(pool_size = (2,2), padding = 'same'))
model.add(Dense(64, activation = 'relu'))
model.add(Flatten())
model.add(Dense(7, activation = 'softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 2, 4, 16)          80        
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 1, 2, 16)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 1, 2, 32)          2080      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 1, 1, 32)          0         
_________________________________________________________________
dense_1 (Dense)              (None, 1, 1, 64)          2112      
_________________________________________________________________
flatten_1 (Flatten)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 7)                

In [8]:
save_checkpoint = ModelCheckpoint(
    filepath = '../trained_models/cnn2d.h5',
    monitor = 'val_accuracy',
    save_best_only = True,
    save_weights_only = False,
    mode = 'auto'
)

learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience=10, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

history = model.fit(x, y, batch_size=64, validation_split=0.3,callbacks=[save_checkpoint], epochs=500,shuffle=False)

Train on 11974 samples, validate on 5132 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500


Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500


Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 

Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 

Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 

Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


In [9]:
# import matplotlib.pyplot as plt
# import matplotlib

# matplotlib.rcParams.update({'font.size': 20})
# plt.figure(figsize=(20,16))
# plt.plot(history.history['val_accuracy'], color='b', label="Validation Accuracy")
# plt.plot(history.history['accuracy'], color='r', label="Training Accuracy")

# plt.ylim([0.9,1])
# plt.xlim([0,len(history.history['loss'])])
# plt.title('Model accuracy')

# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.legend(['Validation', 'Train'])
# plt.savefig('cnn2d-acc.png')
# plt.show()


In [10]:
# plt.figure(figsize=(20,16))
# plt.plot(history.history['val_loss'], color='b', label="Validation Loss")
# plt.plot(history.history['loss'], color='r', label="Training Loss")

# plt.ylim([0,0.6])
# plt.xlim([0,len(history.history['loss'])])
# plt.title('Model loss')

# plt.ylabel('Loss')
# plt.xlabel('Epoch')
# plt.legend(['Validation', 'Train'], loc='upper left')
# plt.savefig('cnn2d-loss.png')
# plt.show()

In [11]:
max(history.history['val_accuracy'])

0.9888932108879089

In [12]:
model = load_model('../trained_models/cnn2d.h5')

datatest = pd.read_csv("../datatest")
x_columns = data.columns.drop('label')
x = data[x_columns]
x = x.values
y = data['label']

x[:, 0] = le_proto.transform(x[:, 0])
#joblib.dump(le_proto, '../preproc/le_proto2.joblib',protocol=1) #save label encoder weights to file
y = le_label.transform(y)
#joblib.dump(le_label, '../preproc/le_label2.joblib',protocol=1) #save label encoder weights to file
x=sc.transform(x)
#joblib.dump(sc, '../preproc/scaler2.joblib',protocol=1)
x = x.reshape(x.shape[0],2,4,1)

y_pred = model.predict_classes(x)
y_pred = le_label.inverse_transform(y_pred)
y_test = le_label.inverse_transform(y)
stats = {}
labels = ['normal','tcpsynflood','icmpflood','udpflood','ipsweep','portscan','pingofdeath']
for i in labels:
    stats[i]={}
    for j in labels:
        stats[i][j] = 0

In [13]:
for i in range(len(y)):
    stats[y_test[i]][y_pred[i]] += 1
stats

{'normal': {'normal': 3394,
  'tcpsynflood': 8,
  'icmpflood': 5,
  'udpflood': 0,
  'ipsweep': 5,
  'portscan': 2,
  'pingofdeath': 0},
 'tcpsynflood': {'normal': 0,
  'tcpsynflood': 3172,
  'icmpflood': 0,
  'udpflood': 0,
  'ipsweep': 0,
  'portscan': 0,
  'pingofdeath': 0},
 'icmpflood': {'normal': 116,
  'tcpsynflood': 0,
  'icmpflood': 2108,
  'udpflood': 0,
  'ipsweep': 0,
  'portscan': 0,
  'pingofdeath': 0},
 'udpflood': {'normal': 0,
  'tcpsynflood': 0,
  'icmpflood': 0,
  'udpflood': 2462,
  'ipsweep': 0,
  'portscan': 0,
  'pingofdeath': 0},
 'ipsweep': {'normal': 1,
  'tcpsynflood': 0,
  'icmpflood': 0,
  'udpflood': 0,
  'ipsweep': 2006,
  'portscan': 0,
  'pingofdeath': 0},
 'portscan': {'normal': 0,
  'tcpsynflood': 0,
  'icmpflood': 0,
  'udpflood': 0,
  'ipsweep': 0,
  'portscan': 2587,
  'pingofdeath': 0},
 'pingofdeath': {'normal': 0,
  'tcpsynflood': 0,
  'icmpflood': 1,
  'udpflood': 0,
  'ipsweep': 0,
  'portscan': 0,
  'pingofdeath': 1239}}

In [14]:
tpr={}
fpr={}
precision={}
f1score={}
for i in labels:
    tp = stats[i][i]
    fp = 0
    tn = 0
    fn = 0
    for j in labels:
        if i != j:
            fn += stats[i][j]
            fp += stats[j][i]
            for k in stats[j].keys():
                if k != i:
                    tn += stats[j][k]
    tpr[i] = tp / (tp + fn)
    fpr[i] = fp / (fp + tn)
    precision[i] = tp / (tp + fp)
    f1score[i] = 2/(1/tpr[i] + 1/precision[i])
print('TPR: \n ' + str(tpr))
print('FRP:\n ' + str(fpr))
print('Precision:\n ' + str(precision))
print('F1-score: \n' + str(f1score))

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

TPR: 
 {'normal': 0.994141769185706, 'tcpsynflood': 1.0, 'icmpflood': 0.947841726618705, 'udpflood': 1.0, 'ipsweep': 0.9995017438963627, 'portscan': 1.0, 'pingofdeath': 0.9991935483870967}
FRP:
 {'normal': 0.008545135845749343, 'tcpsynflood': 0.0005741352088416823, 'icmpflood': 0.00040317161671818305, 'udpflood': 0.0, 'ipsweep': 0.0003311477581296775, 'portscan': 0.00013775053378331842, 'pingofdeath': 0.0}
Precision:
 {'normal': 0.9666761606379949, 'tcpsynflood': 0.9974842767295597, 'icmpflood': 0.9971617786187322, 'udpflood': 1.0, 'ipsweep': 0.9975136747886624, 'portscan': 0.9992275009656237, 'pingofdeath': 1.0}
F1-score: 
{'normal': 0.980216606498195, 'tcpsynflood': 0.9987405541561712, 'icmpflood': 0.9718764407561089, 'udpflood': 1.0, 'ipsweep': 0.998506719761075, 'portscan': 0.999613601236476, 'pingofdeath': 0.9995966115369102}
Accuracy: 0.9919326552086987
