<a href="https://colab.research.google.com/github/fwangliberty/AIoTDesign-Frontend/blob/master/Classification_CNN1D_68_features_split_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Intrusion Detection using CNN1D for CICIDS 2017 Data Set with 68 Features (1)

We will not use any categoric features: Source Port, Destination Port and Protocol.

We use the pre-processing dataset from mlp4nids (Multi-layer perceptron for network intrusion detection) https://github.com/ArnaudRosay/mlp4nids. In this case, we do not use SMOTE to add more anomalies. 

In [1]:
import os
from os.path import join
import glob
import pandas as pd
import numpy as np
import time
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
def display_metrics(y_test, y_pred, label_names):
  print('\nAccuracy: {:.2f}\n'.format(accuracy_score(y_test, y_pred)))

  print('Micro Precision: {:.2f}'.format(precision_score(y_test, y_pred, average='micro')))
  print('Micro Recall: {:.2f}'.format(recall_score(y_test, y_pred, average='micro')))
  print('Micro F1-score: {:.2f}\n'.format(f1_score(y_test, y_pred, average='micro')))

  print('Macro Precision: {:.2f}'.format(precision_score(y_test, y_pred, average='macro')))
  print('Macro Recall: {:.2f}'.format(recall_score(y_test, y_pred, average='macro')))
  print('Macro F1-score: {:.2f}\n'.format(f1_score(y_test, y_pred, average='macro')))

  print('Weighted Precision: {:.2f}'.format(precision_score(y_test, y_pred, average='weighted')))
  print('Weighted Recall: {:.2f}'.format(recall_score(y_test, y_pred, average='weighted')))
  print('Weighted F1-score: {:.2f}'.format(f1_score(y_test, y_pred, average='weighted')))

  print('\nClassification Report\n')
  print(classification_report(y_test, y_pred, target_names=label_names))

In [3]:
def display_all(df):
    with pd.option_context("display.max_rows", 100, "display.max_columns", 100): 
        print(df)

In [4]:
def make_value2index(attacks):
    #make dictionary
    attacks = sorted(attacks)
    d = {}
    counter=0
    for attack in attacks:
        d[attack] = counter
        counter+=1
    return d

In [5]:
# chganges label from string to integer/index
def encode_label(Y_str):
    labels_d = make_value2index(np.unique(Y_str))
    Y = [labels_d[y_str] for y_str  in Y_str]
    Y = np.array(Y)
    return np.array(Y)

## Step 1. Loading csv files

In [6]:
# All columns
col_names = ['Source Port', 'Destination Port',
                      'Protocol', 'Flow Duration', 'Total Fwd Packets', 'Total Backward Packets', 'Total Length of Fwd Packets',
                      'Total Length of Bwd Packets', 'Fwd Packet Length Max', 'Fwd Packet Length Min', 'Fwd Packet Length Mean',
                      'Fwd Packet Length Std', 'Bwd Packet Length Max', 'Bwd Packet Length Min', 'Bwd Packet Length Mean', 'Bwd Packet Length Std',
                      'Flow Bytes/s', 'Flow Packets/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Total',
                      'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Total', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max',
                      'Bwd IAT Min', 'Fwd PSH Flags', 'Fwd URG Flags', 'Fwd Header Length', 'Bwd Header Length',
                      'Fwd Packets/s', 'Bwd Packets/s', 'Min Packet Length', 'Max Packet Length', 'Packet Length Mean', 'Packet Length Std',
                      'Packet Length Variance', 'FIN Flag Count', 'SYN Flag Count', 'RST Flag Count', 'PSH Flag Count', 'ACK Flag Count',
                      'URG Flag Count', 'CWE Flag Count', 'ECE Flag Count', 'Down/Up Ratio', 'Average Packet Size', 'Avg Fwd Segment Size',
                      'Avg Bwd Segment Size','Subflow Fwd Packets', 'Subflow Fwd Bytes',
                      'Subflow Bwd Packets', 'Subflow Bwd Bytes', 'Init_Win_bytes_forward', 'Init_Win_bytes_backward',
                      'act_data_pkt_fwd', 'min_seg_size_forward', 'Active Mean', 'Active Std', 'Active Max', 'Active Min', 'Idle Mean',
                      'Idle Std', 'Idle Max', 'Idle Min', 'Label']

### Option 1. Connect to Google Drive

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
# load three csv files generated by mlp4nids (Multi-layer perceptron for network intrusion detection )
# first load the train set
df_train = pd.read_csv('/content/drive/My Drive/CICIDS2017/train_set.csv',names=col_names, skiprows=1)  

In [9]:
df_train.head()

Unnamed: 0,Source Port,Destination Port,Protocol,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Fwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
546863,50203,389,17,130,2,2,328,326,164,164,164.0,0.0,163,163,163.0,0.0,5030769.0,30769.23077,43.33333,68.12733,122,4,4,4.0,0.0,4,4,4,4.0,0.0,4,4,0,0,40,40,15384.61538,15384.61538,163,164,163.6,0.547723,0.3,0,0,0,0,0,0,0,0,1,204.5,164.0,163.0,2,328,2,326,-1,-1,1,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
2284286,47786,80,6,85594954,6,6,360,11595,360,0,60.0,146.969385,4344,0,1932.5,1754.831473,139.6694,0.140195,7781359.0,25800000.0,85500000,4,85500000,17100000.0,38200000.0,85500000,4,144398,28879.6,58478.93,133148,16,0,0,200,200,0.070098,0.070098,0,4344,919.615385,1498.335273,2245009.0,0,0,0,0,1,0,0,0,1,996.25,60.0,1932.5,6,360,6,11595,251,235,1,32,495.0,0.0,495,495,85500000.0,0.0,85500000,85500000,DoS Hulk
390020,51520,443,6,117915594,19,17,1406,4654,774,0,74.0,179.202245,1418,0,273.764706,485.037567,51.39269,0.305303,3369017.0,13800000.0,58900000,1,118000000,6550866.0,18900000.0,58900000,1,118000000,7366629.625,20000000.0,59000000,1,0,0,616,552,0.161132,0.144171,0,1418,163.783784,362.39329,131328.9,0,0,0,1,0,0,0,0,0,168.333333,74.0,273.764706,19,1406,17,4654,29200,181,8,32,495327.0,572653.2552,900254,90400,58400000.0,665473.7481,58900000,58000000,BENIGN
212934,61397,53,17,30731,1,1,46,93,46,46,46.0,0.0,93,93,93.0,0.0,4523.12,65.080863,30731.0,0.0,30731,30731,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,20,20,32.540431,32.540431,46,93,61.666667,27.135463,736.3333,0,0,0,0,0,0,0,0,1,92.5,46.0,93.0,1,46,1,93,-1,-1,0,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
306030,80,37209,6,49,1,1,0,0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,40816.32653,49.0,0.0,49,49,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,32,32,20408.16327,20408.16327,0,0,0.0,0.0,0.0,0,0,0,0,1,1,0,0,1,0.0,0.0,0.0,1,0,1,0,332,229,0,32,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN


In [9]:
df_train=df_train.drop(columns=['Source Port', 'Destination Port', 'Protocol'])

In [11]:
print('Train set size: ', df_train.shape)

Train set size:  (556548, 69)


In [10]:
df_test = pd.read_csv('/content/drive/My Drive/CICIDS2017/test_set.csv',names=col_names, skiprows=1)  
df_test=df_test.drop(columns=['Source Port', 'Destination Port', 'Protocol'])
print('Test set size: ', df_test.shape)

df_val = pd.read_csv('/content/drive/My Drive/CICIDS2017/crossval_set.csv',names=col_names, skiprows=1)  
df_val=df_val.drop(columns=['Source Port', 'Destination Port', 'Protocol'])
print('Validation set size: ', df_val.shape)

Test set size:  (278270, 69)
Validation set size:  (278270, 69)


### Option 2. Load from local machine

In [None]:
dataroot = '../data/cicids2017clean/train_set.csv'
df_train = pd.read_csv(dataroot, names=col_names, skiprows=1) 

In [None]:
dataroot = '../data/cicids2017clean/crossval_set.csv'
df_val = pd.read_csv(dataroot, names=col_names, skiprows=1) 
dataroot = '../data/cicids2017clean/test_set.csv'
df_test = pd.read_csv(dataroot, names=col_names, skiprows=1) 

## Step 2. Exploring the dataset

In [None]:
df_train.head()

Unnamed: 0,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Fwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
546863,130,2,2,328,326,164,164,164.0,0.0,163,163,163.0,0.0,5030769.0,30769.23077,43.33333,68.12733,122,4,4,4.0,0.0,4,4,4,4.0,0.0,4,4,0,0,40,40,15384.61538,15384.61538,163,164,163.6,0.547723,0.3,0,0,0,0,0,0,0,0,1,204.5,164.0,163.0,2,328,2,326,-1,-1,1,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
2284286,85594954,6,6,360,11595,360,0,60.0,146.969385,4344,0,1932.5,1754.831473,139.6694,0.140195,7781359.0,25800000.0,85500000,4,85500000,17100000.0,38200000.0,85500000,4,144398,28879.6,58478.93,133148,16,0,0,200,200,0.070098,0.070098,0,4344,919.615385,1498.335273,2245009.0,0,0,0,0,1,0,0,0,1,996.25,60.0,1932.5,6,360,6,11595,251,235,1,32,495.0,0.0,495,495,85500000.0,0.0,85500000,85500000,DoS Hulk
390020,117915594,19,17,1406,4654,774,0,74.0,179.202245,1418,0,273.764706,485.037567,51.39269,0.305303,3369017.0,13800000.0,58900000,1,118000000,6550866.0,18900000.0,58900000,1,118000000,7366629.625,20000000.0,59000000,1,0,0,616,552,0.161132,0.144171,0,1418,163.783784,362.39329,131328.9,0,0,0,1,0,0,0,0,0,168.333333,74.0,273.764706,19,1406,17,4654,29200,181,8,32,495327.0,572653.2552,900254,90400,58400000.0,665473.7481,58900000,58000000,BENIGN
212934,30731,1,1,46,93,46,46,46.0,0.0,93,93,93.0,0.0,4523.12,65.080863,30731.0,0.0,30731,30731,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,20,20,32.540431,32.540431,46,93,61.666667,27.135463,736.3333,0,0,0,0,0,0,0,0,1,92.5,46.0,93.0,1,46,1,93,-1,-1,0,20,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN
306030,49,1,1,0,0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,40816.32653,49.0,0.0,49,49,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0,0,32,32,20408.16327,20408.16327,0,0,0.0,0.0,0.0,0,0,0,0,1,1,0,0,1,0.0,0.0,0.0,1,0,1,0,332,229,0,32,0.0,0.0,0,0,0.0,0.0,0,0,BENIGN


Count the number of attacks

In [None]:
df_train['Label'].value_counts()

BENIGN                        278274
DoS Hulk                      115062
PortScan                       79402
DDoS                           64012
DoS GoldenEye                   5146
FTP-Patator                     3967
SSH-Patator                     2948
DoS slowloris                   2898
DoS Slowhttptest                2749
Bot                              978
Web Attack � Brute Force         753
Web Attack � XSS                 326
Infiltration                      18
Web Attack � Sql Injection        10
Heartbleed                         5
Name: Label, dtype: int64

In [None]:
df_train.describe()

Unnamed: 0,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Fwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
count,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0,556548.0
mean,21599310.0,7.561386,8.259167,682.2941,12659.55,336.406078,18.889621,96.20026,129.272466,1702.530853,25.598678,561.627492,704.125544,921606.8,69566.71,2010454.0,5303410.0,17688240.0,216692.8,21144770.0,3910157.0,6895611.0,17471740.0,814745.8,10714410.0,1973938.0,2677833.0,7299100.0,697551.7,0.034874,0.0,188.4306,199.4029,61904.88,7797.16,10.577194,1918.313215,308.111075,606.701378,1260081.0,0.056247,0.034874,0.000119,0.360393,0.371772,0.093363,0.0,0.000119,0.813482,340.208877,96.20026,561.627492,7.561386,682.2941,8.259167,12661.1,5913.062827,1109.007953,4.883591,25.540766,108236.1,32426.6,157184.9,88653.28,16406460.0,1206008.0,17305000.0,15526210.0
std,38246650.0,641.917381,865.356666,8207.379,1934124.0,1243.294768,107.276928,334.915124,524.220046,2851.196636,57.820679,873.686322,1254.293604,22255860.0,263243.8,5270740.0,10187510.0,33802250.0,3879389.0,38308400.0,9825054.0,14068520.0,34078360.0,7862357.0,29239810.0,8267067.0,8952940.0,22725250.0,7091812.0,0.183461,0.0,13505.87,17352.01,254256.0,42203.04,21.863457,2990.143762,434.674128,944.496729,2616912.0,0.230398,0.183461,0.010889,0.480115,0.483278,0.290941,0.0,0.010889,1.011007,477.689108,334.915124,873.686322,641.917381,8207.379,865.356666,1934497.0,11687.758097,6328.829096,612.739185,6.418384,716470.0,356700.6,958873.9,666846.9,32835310.0,7260774.0,33900420.0,32592790.0
min,-13.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-193000000.0,-2000000.0,-13.0,0.0,-13.0,-13.0,0.0,0.0,0.0,0.0,-12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,112.0,1.0,1.0,6.0,6.0,6.0,0.0,6.0,0.0,6.0,0.0,6.0,0.0,120.3128,0.6693823,65.33333,0.0,100.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,20.0,0.3398826,0.07024817,0.0,6.0,3.6,2.309401,5.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,6.0,6.0,1.0,6.0,1.0,6.0,0.0,-1.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,68674.5,2.0,2.0,56.0,112.0,32.0,0.0,29.0,0.0,68.0,0.0,61.8,0.0,3188.603,62.99184,23539.0,16986.5,60781.5,4.0,376.0,262.0,0.0,355.0,3.0,4.0,4.0,0.0,4.0,2.0,0.0,0.0,64.0,40.0,30.31994,5.164177,0.0,94.0,59.4,28.867513,833.3333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,75.0,29.0,61.8,2.0,56.0,2.0,112.0,256.0,0.0,1.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,11043320.0,6.0,6.0,338.0,11595.0,311.0,6.0,51.285714,108.512672,2896.0,6.0,945.193421,916.032706,137931.0,24242.42,1857667.0,3578162.0,9959993.0,50.0,9468686.0,2315217.0,3295928.0,8219649.0,48.0,253170.5,52370.74,66067.56,194129.2,46.0,0.0,0.0,144.0,160.0,12500.0,7662.835,6.0,4344.0,730.0625,1380.446584,1905633.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,778.733333,51.285714,945.193421,6.0,338.0,6.0,11595.0,8192.0,235.0,2.0,32.0,953.0,0.0,953.0,882.0,9553880.0,0.0,9803576.0,7462839.0
max,119999900.0,207964.0,284602.0,2866110.0,627000000.0,24820.0,1472.0,5940.857143,7049.469004,17376.0,1460.0,4370.686524,6694.376371,2070000000.0,3000000.0,120000000.0,84800000.0,120000000.0,120000000.0,120000000.0,120000000.0,83400000.0,120000000.0,120000000.0,120000000.0,120000000.0,81300000.0,120000000.0,120000000.0,1.0,0.0,4369484.0,5692040.0,3000000.0,2000000.0,1359.0,24820.0,1936.833333,4731.522394,22400000.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,124.0,2528.0,5940.857143,4370.686524,207964.0,2866110.0,284602.0,627039500.0,65535.0,65535.0,198636.0,60.0,110000000.0,70500000.0,110000000.0,110000000.0,120000000.0,74200000.0,120000000.0,120000000.0


Read test and validation sets

In [None]:
print('Test set: ')
df_test['Label'].value_counts()

Test set: 


BENIGN                        139135
DoS Hulk                       57531
PortScan                       39701
DDoS                           32006
DoS GoldenEye                   2573
FTP-Patator                     1983
SSH-Patator                     1474
DoS slowloris                   1449
DoS Slowhttptest                1374
Bot                              489
Web Attack � Brute Force         376
Web Attack � XSS                 163
Infiltration                       9
Web Attack � Sql Injection         5
Heartbleed                         2
Name: Label, dtype: int64

In [None]:
df_test.describe()

Unnamed: 0,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Fwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
count,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0
mean,20184940.0,13.425781,15.071391,501.816,27770.75,185.079276,9.858127,42.498337,62.543191,1437.918892,16.702825,472.989076,598.916973,1319690.0,91530.9,2091054.0,4928267.0,15411550.0,211900.1,19817160.0,4173283.0,5569102.0,15224870.0,1619357.0,10586950.0,2418274.0,2028897.0,6305801.0,1360565.0,0.055008,0.0,318.3946,338.412,85411.6,6162.424,7.706957,1472.16314,241.332367,466.822599,897032.5,0.058659,0.055008,7.9e-05,0.42342,0.383936,0.024473,0.0,7.9e-05,0.515485,266.229446,42.498337,472.989076,13.425781,501.816,15.071391,27770.75,10570.92108,1731.26747,10.076965,26.450577,99502.58,43576.01,163586.3,72657.77,13899760.0,931034.2,14611830.0,13197460.0
std,37476720.0,1208.778536,1591.037162,7898.116,3604963.0,392.614677,61.235977,99.578231,132.298567,2586.48709,46.983524,797.823718,1144.520735,23653540.0,293954.0,5446991.0,10464060.0,31565390.0,3035605.0,37418610.0,11780490.0,12628960.0,31610730.0,10665330.0,29133230.0,10451820.0,7731527.0,20466300.0,9753000.0,0.227996,0.0,25247.65,31824.66,288988.5,35115.81,17.933308,2584.335069,383.113699,824.111902,2290032.0,0.234986,0.227996,0.008891,0.494102,0.486344,0.154512,0.0,0.008891,0.551013,420.649091,99.578231,797.823718,1208.778536,7898.116,1591.037162,3604963.0,17760.016938,7434.284715,1153.408495,6.864564,742396.2,438225.0,1087195.0,666408.7,30552650.0,6557272.0,31588120.0,30246800.0
min,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-12000000.0,-2000000.0,-1.0,0.0,-1.0,-13.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,80.0,2.0,1.0,2.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,41.87093,0.7809336,71.0,0.0,78.0,3.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,20.0,0.5793178,0.008503405,0.0,6.0,3.333333,2.309401,5.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2.0,0.0,2.0,2.0,1.0,0.0,251.0,-1.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,85383.0,3.0,1.0,37.0,61.0,30.0,0.0,10.90048,0.0,48.0,0.0,41.0,0.0,1179.98,60.26881,26946.9,10098.84,54893.5,12.0,7013.0,3503.5,335.1686,6663.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,72.0,32.0,32.05693,0.7639334,0.0,56.0,48.0,18.622567,346.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,59.75,10.90048,41.0,3.0,37.0,1.0,61.0,304.0,0.0,1.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,9936814.0,7.0,6.0,356.0,4532.0,321.0,6.0,54.333333,111.538871,1460.0,6.0,542.958333,663.236171,74001.25,26315.79,1557685.0,2991012.0,8012042.0,72.0,8004861.0,2007233.0,2359285.0,6955281.0,145.0,217463.8,44523.51,55015.63,148199.8,45.0,0.0,0.0,172.0,152.0,13888.89,61.20382,6.0,1460.0,298.055556,533.916876,285067.2,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,315.055556,54.333333,542.958333,7.0,356.0,6.0,4532.0,8192.0,235.0,3.0,32.0,5.0,0.0,5.0,5.0,5853280.0,0.0,5860882.0,5499876.0
max,119999900.0,219759.0,291922.0,1323378.0,655453000.0,23360.0,2065.0,4638.923469,7125.596846,17376.0,2146.0,3884.924556,6715.738331,2071000000.0,3000000.0,114392600.0,84781720.0,119994600.0,114392600.0,119999800.0,119961000.0,84602930.0,119994800.0,119961000.0,119999600.0,119974100.0,84418010.0,119974100.0,119974100.0,1.0,0.0,4644908.0,5838440.0,3000000.0,2000000.0,389.0,23360.0,1877.272727,4414.547151,19488230.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,7.0,2068.0,4638.923469,3884.924556,219759.0,1323378.0,291922.0,655453000.0,65535.0,65535.0,213557.0,93.0,101659700.0,64349500.0,101659700.0,101659700.0,119994600.0,73532390.0,119994600.0,119994600.0


In [None]:
print('Validation set: ')
df_val['Label'].value_counts()

Validation set: 


BENIGN                        139135
DoS Hulk                       57531
PortScan                       39701
DDoS                           32006
DoS GoldenEye                   2573
FTP-Patator                     1983
SSH-Patator                     1474
DoS slowloris                   1449
DoS Slowhttptest                1374
Bot                              489
Web Attack � Brute Force         376
Web Attack � XSS                 163
Infiltration                       9
Web Attack � Sql Injection         5
Heartbleed                         2
Name: Label, dtype: int64

In [None]:
df_val.describe()

Unnamed: 0,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Fwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
count,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0,278270.0
mean,22677200.0,5.697168,5.401071,363.328418,6993.901,146.271053,12.216883,37.118951,48.506259,1644.006185,31.044985,545.020851,677.301293,1026168.0,76478.36,2035292.0,5660128.0,18679830.0,117620.9,22429470.0,4097971.0,7275898.0,18575770.0,853211.7,10332440.0,2108780.0,2497470.0,6852268.0,877710.6,0.032285,0.0,-18535.84,-6184.786,69229.16,7292.921,11.819104,1667.675854,265.849753,521.021331,1032910.0,0.063622,0.032285,0.000111,0.325008,0.360617,0.052697,0.0,0.000111,0.635189,293.529747,37.118951,545.020851,5.697168,363.328418,5.401071,6994.36,8300.353962,1196.878449,2.850606,-6302.925,101166.3,38665.28,158403.2,77729.19,17583770.0,933741.7,18297550.0,16894050.0
std,39575180.0,67.142466,87.166823,3246.913595,258556.5,380.776699,29.589643,80.881268,118.838474,2798.340693,63.404307,851.938455,1229.566919,18488960.0,273385.5,4353752.0,10816190.0,35326090.0,1940212.0,39554070.0,9992215.0,14501080.0,35365830.0,7979563.0,29286910.0,9212534.0,9080364.0,22681320.0,8098570.0,0.176757,0.0,4255039.0,856332.5,268176.6,35408.18,21.282926,2802.004249,395.033818,872.649085,2337799.0,0.244078,0.176757,0.010554,0.468379,0.480181,0.223428,0.0,0.010554,0.543604,431.308583,80.881268,851.938455,67.142466,3246.913595,87.166823,258608.2,13668.065695,6710.083319,32.991321,728696.9,840451.5,448735.6,1167019.0,758208.7,34554260.0,6638199.0,35404210.0,34373310.0
min,-12.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-12000000.0,-2000000.0,-12.0,0.0,-12.0,-12.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1929350000.0,-167770500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,-83885310.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,87.0,1.0,1.0,0.0,6.0,0.0,0.0,0.0,0.0,6.0,0.0,6.0,0.0,119.7319,0.6190867,62.0,0.0,85.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,20.0,0.4824028,0.06077324,0.0,6.0,2.0,3.464102,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,6.0,1.0,0.0,1.0,6.0,-1.0,-1.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,48520.0,2.0,2.0,56.0,138.0,32.0,0.0,30.0,0.0,87.0,0.0,79.0,0.0,3111.79,78.89413,19778.95,10620.92,45088.0,4.0,76.0,69.0,0.0,74.0,3.0,3.0,3.0,0.0,3.0,2.0,0.0,0.0,64.0,40.0,39.94567,7.588463,0.0,94.0,59.6,28.481573,811.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,75.5,30.0,79.0,2.0,56.0,2.0,138.0,256.0,0.0,1.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,11812030.0,6.0,5.0,317.0,6899.0,231.0,25.0,49.0,79.203964,2052.0,46.0,702.75,757.42027,122449.0,25974.03,1982858.0,3728053.0,9999176.0,55.0,10700000.0,2714097.0,3740416.0,10000000.0,48.0,152490.8,31879.14,56627.72,136444.8,46.0,0.0,0.0,164.0,132.0,13333.33,8474.576,23.0,2313.0,384.862121,682.325913,465568.7,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,408.011858,49.0,702.75,6.0,317.0,5.0,6899.0,8192.0,235.0,2.0,32.0,652.0,0.0,652.0,504.75,9604032.0,0.0,9992129.0,7454226.0
max,120000000.0,16412.0,20326.0,624920.0,74900000.0,24820.0,2325.0,5177.25641,5199.042702,15928.0,1983.0,5800.5,8194.660487,2070000000.0,3000000.0,119000000.0,84800000.0,120000000.0,119000000.0,120000000.0,120000000.0,83700000.0,120000000.0,120000000.0,120000000.0,120000000.0,83400000.0,120000000.0,120000000.0,1.0,0.0,533580.0,650440.0,3000000.0,2000000.0,1306.0,24820.0,2265.586207,4731.522394,22400000.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,8.0,2328.0,5177.25641,5800.5,16412.0,624920.0,20326.0,74870240.0,65535.0,65535.0,6697.0,56.0,106000000.0,50400000.0,106000000.0,106000000.0,120000000.0,76600000.0,120000000.0,120000000.0


## Step 3. Encode Datasets

Encoding the labels, and generate numpy array. Note that the label has not been encoded as one-hot coding. We will use one-hot code later. 

### Step 3.1 Encoding train dataset

In [34]:
df_label = df_train['Label']
data = df_train.drop(columns=['Label'])
Xtrain = data.values
y_train = encode_label(df_label.values)

### Step 3.2. Encoding test dataset

In [35]:
df_label = df_test['Label']
data = df_test.drop(columns=['Label'])
Xtest = data.values
y_test = encode_label(df_label.values)

### Step 3.3 Encoding validation dataset

In [36]:
df_label = df_val['Label']
data = df_val.drop(columns=['Label'])
Xval = data.values
y_val = encode_label(df_label.values)

## Step 4. Normalization or Standardization

The continuous feature values are normalized into the same feature space. This is important when using features that have different measurements, and is a general requirement of many machine learning algorithms. We implement the two methods to see the impact on the final classifications. 

## Option 1. Normalization

The values of the datasets are normalized using the Min-Max scaling technique, bringing them all within a range of [0,1].

### Step 4.1 Normalizing train dataset

In [14]:
from sklearn.preprocessing import MinMaxScaler

In [15]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(Xtrain)
X_train

array([[1.19166704e-06, 4.80854767e-06, 7.02735750e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [7.13291618e-01, 2.40427384e-05, 2.10820725e-05, ...,
        0.00000000e+00, 7.12500000e-01, 7.12500000e-01],
       [9.82630369e-01, 8.65538581e-05, 5.97325388e-05, ...,
        8.96864890e-03, 4.90833333e-01, 4.83333333e-01],
       ...,
       [5.66517679e-03, 9.61709535e-06, 1.40547150e-05, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.00308397e-04, 4.80854767e-06, 7.02735750e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.64217802e-02, 9.61709535e-06, 1.40547150e-05, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

### Step 4.2. Normalizing validation dataset

In [16]:
X_val = scaler.fit_transform(Xval)
X_val

array([[9.11674962e-03, 1.82804217e-04, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [5.08333312e-07, 0.00000000e+00, 4.91980714e-05, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [6.28146249e-01, 1.15776004e-03, 1.13155564e-03, ...,
        5.68691207e-05, 8.33333333e-02, 8.32947583e-02],
       ...,
       [1.01176662e-03, 0.00000000e+00, 4.91980714e-05, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [4.58333314e-07, 0.00000000e+00, 4.91980714e-05, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.00521829e-02, 1.21869478e-04, 1.96792286e-04, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

### Step 4.3. Normalizing test dataset

In [17]:
X_test = scaler.fit_transform(Xtest)
X_test

array([[3.58333608e-07, 0.00000000e+00, 3.42557258e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [4.16666986e-08, 4.55046005e-06, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [4.50000345e-07, 0.00000000e+00, 3.42557258e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [8.35847991e-01, 2.73027603e-05, 2.05534355e-05, ...,
        0.00000000e+00, 8.33370661e-01, 8.33370661e-01],
       [5.04708720e-04, 0.00000000e+00, 3.42557258e-06, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [9.92561411e-01, 6.96220388e-04, 1.12701338e-03, ...,
        5.01804492e-03, 4.88761059e-01, 4.84412289e-01]])

## Option 2. **Standardization**

In [37]:
from sklearn.preprocessing import StandardScaler

In [38]:
scaler = StandardScaler()

X_train = scaler.fit_transform(Xtrain)
X_val = scaler.fit_transform(Xval)
X_test = scaler.fit_transform(Xtest)

X_train

array([[-5.64734391e-01, -8.66371702e-03, -7.23305556e-03, ...,
        -1.66099244e-01, -5.10466266e-01, -4.76369758e-01],
       [ 1.67323679e+00, -2.43237991e-03, -2.61067990e-03, ...,
        -1.66099244e-01,  2.01162882e+00,  2.14691177e+00],
       [ 2.51829561e+00,  1.78194657e-02,  1.01008532e-02, ...,
        -7.44458768e-02,  1.22697701e+00,  1.30316625e+00],
       ...,
       [-5.46963460e-01, -7.10588274e-03, -4.92186773e-03, ...,
        -1.66099244e-01, -5.10466266e-01, -4.76369758e-01],
       [-5.64109656e-01, -8.66371702e-03, -7.23305556e-03, ...,
        -1.66099244e-01, -5.10466266e-01, -4.76369758e-01],
       [-5.13214278e-01, -7.10588274e-03, -4.92186773e-03, ...,
        -1.66099244e-01, -5.10466266e-01, -4.76369758e-01]])

## Step 5 One-hot encoding for labels

y_train, y_test and y_val have to be one-hot-encoded. That means they must have dimension (number_of_samples, 15), where 15 denotes number of classes.

In [39]:
from tensorflow.keras.utils import to_categorical

Save the labels for AdaBoostClassifier

In [40]:
y_train_ada = y_train
y_test_ada = y_test
y_val_ada = y_val

In [41]:
y_train = to_categorical(y_train, 15)
y_test = to_categorical(y_test, 15)
y_val = to_categorical(y_val, 15)

## Step 6. Define the metrics

In [42]:
import tensorflow as tf
from sklearn.ensemble import RandomForestClassifier

#importing confusion matrix
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from sklearn import metrics
from sklearn.metrics import accuracy_score

#importing accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import mean_squared_error,mean_absolute_error

Get the attacks' names

In [43]:
labels_d = make_value2index(df_test['Label'])

In [44]:
print(labels_d)

{'BENIGN': 139134, 'Bot': 139623, 'DDoS': 171629, 'DoS GoldenEye': 174202, 'DoS Hulk': 231733, 'DoS Slowhttptest': 233107, 'DoS slowloris': 234556, 'FTP-Patator': 236539, 'Heartbleed': 236541, 'Infiltration': 236550, 'PortScan': 276251, 'SSH-Patator': 277725, 'Web Attack � Brute Force': 278101, 'Web Attack � Sql Injection': 278106, 'Web Attack � XSS': 278269}


# CNN1D Model 

In [45]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, BatchNormalization, Flatten, Dense, Activation,Dropout,MaxPooling1D
from tensorflow.keras.constraints import max_norm

In [85]:
#hyper-params
batch_size = 5500 # increasing batch size with more gpu added
input_dim = X_train.shape[1]
num_class = 15                   # 15 intrusion classes, including benign traffic class
num_epochs = 30
learning_rates = 1e-4
regularizations = 1e-3
optim = tf.keras.optimizers.Adam(lr=learning_rates, beta_1=0.9, beta_2=0.999, epsilon=1e-8)

print(input_dim)
print(num_class)

68
15


In [47]:
#X_train_r = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_train_r = np.zeros((len(X_train), input_dim, 1))
X_train_r[:, :, 0] = X_train[:, :input_dim]
print(X_train_r.shape)

(556548, 68, 1)


In [48]:
X_test_r = np.zeros((len(X_test), input_dim, 1))
X_test_r[:, :, 0] = X_test[:, :input_dim]
print(X_test_r.shape)

(278270, 68, 1)


In [49]:
X_val_r = np.zeros((len(X_val), input_dim, 1))
X_val_r[:, :, 0] = X_val[:, :input_dim]
print(X_val_r.shape)

(278270, 68, 1)


In [70]:
model = Sequential()

# input layer
model.add(Conv1D(filters=32, kernel_size=17, activation='relu', padding='same', kernel_initializer='he_uniform', input_shape=(68,1)))
model.add(Conv1D(filters=32, kernel_size=11, activation='relu', padding='same', kernel_initializer='he_uniform'))
model.add(MaxPooling1D(pool_size=2,strides=2))
model.add(Dropout(0.2))
model.add(BatchNormalization(axis=1))

model.add(Conv1D(filters=64, kernel_size=13, activation='relu', padding='same', kernel_initializer='he_uniform'))
model.add(Conv1D(filters=64, kernel_size=11, activation='relu', padding='same', kernel_initializer='he_uniform'))
model.add(MaxPooling1D(pool_size=2,strides=2))
model.add(Dropout(0.2))
model.add(BatchNormalization(axis=1)) 

model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_class))
model.add(Activation('softmax'))


model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_20 (Conv1D)           (None, 68, 32)            576       
_________________________________________________________________
conv1d_21 (Conv1D)           (None, 68, 32)            11296     
_________________________________________________________________
max_pooling1d_10 (MaxPooling (None, 34, 32)            0         
_________________________________________________________________
dropout_15 (Dropout)         (None, 34, 32)            0         
_________________________________________________________________
batch_normalization_10 (Batc (None, 34, 32)            136       
_________________________________________________________________
conv1d_22 (Conv1D)           (None, 34, 64)            26688     
_________________________________________________________________
conv1d_23 (Conv1D)           (None, 34, 64)           

In [86]:
from keras.optimizers import Nadam
from keras.callbacks import LearningRateScheduler, ModelCheckpoint
import keras
import time
time_start = time.time()

reduce_lr = keras.callbacks.ReduceLROnPlateau(moniter='val_loss',
                                              factor=0.1,
                                              patience=10)
nadam = Nadam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.0001)
model.compile(loss = "categorical_crossentropy",optimizer = "nadam", metrics = ["accuracy"])

history = model.fit(X_train_r, y_train, 
                    epochs=50, 
                    batch_size=batch_size, 
                    verbose=2,
                    validation_data=(X_val_r, y_val),
                    callbacks=[reduce_lr])
time_end = time.time()
train_time = time_end - time_start
print("train_time:",train_time)

Epoch 1/50
102/102 - 12s - loss: 0.0400 - accuracy: 0.9849 - val_loss: 0.2485 - val_accuracy: 0.9488
Epoch 2/50
102/102 - 9s - loss: 0.0385 - accuracy: 0.9851 - val_loss: 0.2850 - val_accuracy: 0.9413
Epoch 3/50
102/102 - 9s - loss: 0.0390 - accuracy: 0.9850 - val_loss: 0.2319 - val_accuracy: 0.9603
Epoch 4/50
102/102 - 9s - loss: 0.0372 - accuracy: 0.9858 - val_loss: 0.2165 - val_accuracy: 0.9476
Epoch 5/50
102/102 - 9s - loss: 0.0371 - accuracy: 0.9858 - val_loss: 0.2629 - val_accuracy: 0.9363
Epoch 6/50
102/102 - 9s - loss: 0.0388 - accuracy: 0.9853 - val_loss: 0.2102 - val_accuracy: 0.9632
Epoch 7/50
102/102 - 9s - loss: 0.0388 - accuracy: 0.9852 - val_loss: 0.2119 - val_accuracy: 0.9604
Epoch 8/50
102/102 - 9s - loss: 0.0378 - accuracy: 0.9855 - val_loss: 0.2033 - val_accuracy: 0.9599
Epoch 9/50
102/102 - 9s - loss: 0.0376 - accuracy: 0.9855 - val_loss: 0.2332 - val_accuracy: 0.9541
Epoch 10/50
102/102 - 9s - loss: 0.0381 - accuracy: 0.9853 - val_loss: 0.2048 - val_accuracy: 0.952

## Get the metrics

In [87]:
# evaluate model
accuracy = model.evaluate(X_test_r, y_test, batch_size=batch_size, verbose=1)



In [88]:
y_pred=model.predict(X_test_r)

In [89]:
display_metrics(y_test_ada, np.argmax(y_pred, axis = 1), labels_d)


Accuracy: 0.90

Micro Precision: 0.90
Micro Recall: 0.90
Micro F1-score: 0.90



  _warn_prf(average, modifier, msg_start, len(result))


Macro Precision: 0.66
Macro Recall: 0.51
Macro F1-score: 0.54

Weighted Precision: 0.90
Weighted Recall: 0.90
Weighted F1-score: 0.89

Classification Report



  _warn_prf(average, modifier, msg_start, len(result))


                            precision    recall  f1-score   support

                    BENIGN       0.87      0.94      0.90    139135
                       Bot       0.00      0.00      0.00       489
                      DDoS       0.83      1.00      0.91     32006
             DoS GoldenEye       1.00      0.89      0.94      2573
                  DoS Hulk       0.96      0.72      0.82     57531
          DoS Slowhttptest       0.63      0.76      0.69      1374
             DoS slowloris       0.77      0.44      0.56      1449
               FTP-Patator       0.83      0.94      0.88      1983
                Heartbleed       1.00      0.50      0.67         2
              Infiltration       0.00      0.00      0.00         9
                  PortScan       1.00      0.99      1.00     39701
               SSH-Patator       0.99      0.50      0.67      1474
  Web Attack � Brute Force       0.00      0.00      0.00       376
Web Attack � Sql Injection       0.00      0.00