<a href="https://colab.research.google.com/github/fwangliberty/AIoTDesign-Frontend/blob/master/cnn_small_cicids2017_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Intrusion Detection by using small CICIDS 2017 DataSet with 2000 examples each type

In [3]:
import os
from os.path import join
import glob
import pandas as pd
import numpy as np
import time
import seaborn as sns
import matplotlib.pyplot as plt

In [17]:
def make_value2index(attacks):
    #make dictionary
    attacks = sorted(attacks)
    d = {}
    counter=0
    for attack in attacks:
        d[attack] = counter
        counter+=1
    return d

In [18]:
# chganges label from string to integer/index
def encode_label(Y_str):
    labels_d = make_value2index(np.unique(Y_str))
    Y = [labels_d[y_str] for y_str  in Y_str]
    Y = np.array(Y)
    return np.array(Y)

In [14]:
def get_dataframe_ofType(df, traffic_type):
    """
    Analyze traffic distribution of pandas data frame containing IDS2017 CSV
    file with labelled traffic

    Parameter
    ---------
    df: DataFrame
        Pandas DataFrame corresponding to the content of a CSV file
    traffic_type: string
        name corresponding to traffic type

    Return
    ------
    req_df: DataFrame
        Pandas DataFrame containing only the requested traffic type
    """
    req_df = df.loc[df['Label'] == traffic_type]
    # don't keep original indexes
    #req_df = req_df.reset_index()
    return req_df

In [13]:
def get_typelist(df):
    """
    Extract traffic type from a pandas data frame containing IDS2017 CSV
    file with labelled traffic

    Parameter
    ---------
    df: DataFrame
        Pandas DataFrame corresponding to the content of a CSV file

    Return
    ------
    traffic_type_list: list
        List of traffic types contained in the DataFrame
    """
    traffic_type_list = df['Label'].value_counts().index.tolist()
    return traffic_type_list

In [16]:
#We balance data as follows:
#1) oversample small classes so that their population/count is equal to mean_number_of_samples_per_class
#2) undersample large classes so that their count is equal to mean_number_of_samples_per_class
def balance_data(X,y,seed, mean_samples):
    np.random.seed(seed)
    unique,counts = np.unique(y,return_counts=True)
    mean_samples_per_class = mean_samples # int(round(np.mean(counts)))
    N,D = X.shape #(number of examples, number of features)
    new_X = np.empty((0,D)) 
    new_y = np.empty((0),dtype=int)
    for i,c in enumerate(unique):
        temp_x = X[y==c]
        indices = np.random.choice(temp_x.shape[0],mean_samples_per_class) # gets `mean_samples_per_class` indices of class `c`
        new_X = np.concatenate((new_X,temp_x[indices]),axis=0) # now we put new data into new_X 
        temp_y = np.ones(mean_samples_per_class,dtype=int)*c
        new_y = np.concatenate((new_y,temp_y),axis=0)
        
    # in order to break class order in data we need shuffling
    indices = np.arange(new_y.shape[0])
    np.random.shuffle(indices)
    new_X =  new_X[indices,:]
    new_y = new_y[indices]
    return (new_X,new_y)

## Step 1. Read cleaned CICIDS2017 dataset

Connect to Google Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# All columns
col_names = np.array(['Source Port', 'Destination Port',
                      'Protocol', 'Flow Duration', 'Total Fwd Packets', 'Total Backward Packets', 'Total Length of Fwd Packets',
                      'Total Length of Bwd Packets', 'Fwd Packet Length Max', 'Fwd Packet Length Min', 'Fwd Packet Length Mean',
                      'Fwd Packet Length Std', 'Bwd Packet Length Max', 'Bwd Packet Length Min', 'Bwd Packet Length Mean', 'Bwd Packet Length Std',
                      'Flow Bytes/s', 'Flow Packets/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Total',
                      'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Total', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max',
                      'Bwd IAT Min', 'Fwd PSH Flags', 'Fwd URG Flags', 'Fwd Header Length', 'Bwd Header Length',
                      'Fwd Packets/s', 'Bwd Packets/s', 'Min Packet Length', 'Max Packet Length', 'Packet Length Mean', 'Packet Length Std',
                      'Packet Length Variance', 'FIN Flag Count', 'SYN Flag Count', 'RST Flag Count', 'PSH Flag Count', 'ACK Flag Count',
                      'URG Flag Count', 'CWE Flag Count', 'ECE Flag Count', 'Down/Up Ratio', 'Average Packet Size', 'Avg Fwd Segment Size',
                      'Avg Bwd Segment Size','Subflow Fwd Packets', 'Subflow Fwd Bytes',
                      'Subflow Bwd Packets', 'Subflow Bwd Bytes', 'Init_Win_bytes_forward', 'Init_Win_bytes_backward',
                      'act_data_pkt_fwd', 'min_seg_size_forward', 'Active Mean', 'Active Std', 'Active Max', 'Active Min', 'Idle Mean',
                      'Idle Std', 'Idle Max', 'Idle Min', 'Label'])

In [76]:
col_important = np.array(['Source Port', 'Destination Port', 'Fwd IAT Min', 'Init_Win_bytes_forward', 'Init_Win_bytes_backward', 'Flow IAT Min',
                      'Flow Duration', 'Total Fwd Packets', 'Total Backward Packets', 'Total Length of Fwd Packets',
                      'Total Length of Bwd Packets', 'Fwd Packet Length Max', 'Protocol', 'Fwd Packet Length Min', 'Fwd Packet Length Mean',
                      'Fwd Packet Length Std', 'Bwd Packet Length Max', 'Bwd Packet Length Min', 'Bwd Packet Length Mean', 'Bwd Packet Length Std',
                      'Flow Bytes/s', 'Flow Packets/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Fwd IAT Total',
                      'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Bwd IAT Total', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max',
                      'Bwd IAT Min', 'Fwd PSH Flags', 'Fwd URG Flags', 'Fwd Header Length', 'Bwd Header Length',
                      'Fwd Packets/s', 'Bwd Packets/s', 'Min Packet Length', 'Max Packet Length', 'Packet Length Mean', 'Packet Length Std',
                      'Packet Length Variance', 'FIN Flag Count', 'SYN Flag Count', 'RST Flag Count', 'PSH Flag Count', 'ACK Flag Count',
                      'URG Flag Count', 'CWE Flag Count', 'ECE Flag Count', 'Down/Up Ratio', 'Average Packet Size', 'Avg Fwd Segment Size',
                      'Avg Bwd Segment Size','Subflow Fwd Packets', 'Subflow Fwd Bytes',
                      'Subflow Bwd Packets', 'Subflow Bwd Bytes', 
                      'act_data_pkt_fwd', 'min_seg_size_forward', 'Active Mean', 'Active Std', 'Active Max', 'Active Min', 'Idle Mean',
                      'Idle Std', 'Idle Max', 'Idle Min', 'Label'])

In [77]:
# load train data
#df_train = pd.read_csv('/content/drive/My Drive/CICIDS2017/train_set.csv',names=col_names, skiprows=1)  
df_train = pd.read_csv('/content/drive/My Drive/CICIDS2017/train_set.csv',names=col_important, skiprows=1) 
df_train.head()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Source Port,Destination Port,Fwd IAT Min,Init_Win_bytes_forward,Init_Win_bytes_backward,Flow IAT Min,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Protocol,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,Bwd Packet Length Max,Bwd Packet Length Min,Bwd Packet Length Mean,Bwd Packet Length Std,Flow Bytes/s,Flow Packets/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Fwd IAT Total,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Bwd IAT Total,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Fwd URG Flags,Fwd Header Length,Bwd Header Length,Fwd Packets/s,Bwd Packets/s,Min Packet Length,Max Packet Length,Packet Length Mean,Packet Length Std,Packet Length Variance,FIN Flag Count,SYN Flag Count,RST Flag Count,PSH Flag Count,ACK Flag Count,URG Flag Count,CWE Flag Count,ECE Flag Count,Down/Up Ratio,Average Packet Size,Avg Fwd Segment Size,Avg Bwd Segment Size,Subflow Fwd Packets,Subflow Fwd Bytes,Subflow Bwd Packets,Subflow Bwd Bytes,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
5141,172.16.0.1-192.168.10.50-50294-80-6,172.16.0.1,50294.0,192.168.10.50,80.0,6.0,5/7/2017 10:33,63101744.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.110932,10500000.0,11900000.0,32100000.0,998158.0,63100000.0,10500000.0,11900000.0,32100000.0,998158.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,280.0,0.0,0.110932,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,280.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,29200.0,-1.0,0.0,40.0,7006133.0,0.0,7006133.0,7006133.0,18700000.0,12200000.0,32100000.0,8015895.0,DoS Slowhttptest
40963,172.16.0.1-192.168.10.50-37796-1199-6,172.16.0.1,37796.0,192.168.10.50,1199.0,6.0,7/7/2017 2:52,62.0,1.0,1.0,2.0,6.0,2.0,2.0,2.0,0.0,6.0,6.0,6.0,0.0,129032.3,32258.06452,62.0,0.0,62.0,62.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24.0,20.0,16129.03226,16129.03226,2.0,6.0,3.333333,2.309401,5.333333,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,5.0,2.0,6.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,6.0,1024.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,PortScan
27718,172.217.6.200-192.168.10.12-443-42634-6,172.217.6.200,443.0,192.168.10.12,42634.0,6.0,03/07/2017 09:49:12,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,666666.666667,3.0,0.0,3.0,3.0,3.0,3.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64.0,0.0,666666.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,357.0,-1.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN
106492,192.168.10.8-23.208.79.206-52235-443-6,192.168.10.8,52235.0,23.208.79.206,443.0,6.0,4/7/2017 11:46,5007496.0,7.0,4.0,1679.0,152.0,1080.0,0.0,239.857143,415.237052,152.0,0.0,38.0,76.0,365.6518,2.196707,500749.6,1543257.0,4892570.0,3.0,5007496.0,834582.7,2018795.0,4955369.0,3.0,87090.0,29030.0,31709.63089,63179.0,515.0,0.0,0.0,0.0,0.0,152.0,92.0,1.397904,0.798802,0.0,1080.0,152.583333,327.660428,107361.3561,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,166.454545,239.857143,38.0,152.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,1679.0,4.0,152.0,8192.0,946.0,4.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN
63559,192.168.10.14-52.84.30.203-59835-80-6,52.84.30.203,80.0,192.168.10.14,59835.0,6.0,6/7/2017 10:04,4.0,1.0,1.0,6.0,6.0,6.0,6.0,6.0,0.0,6.0,6.0,6.0,0.0,3000000.0,500000.0,4.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,20.0,250000.0,250000.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,9.0,6.0,6.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,1.0,6.0,115.0,256.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN


In [78]:
#df_test = pd.read_csv('/content/drive/My Drive/CICIDS2017/test_set.csv',names=col_names, skiprows=1)  
df_test = pd.read_csv('/content/drive/My Drive/CICIDS2017/test_set.csv',names=col_important, skiprows=1) 
print('Test set size: ', df_test.shape)

#df_val = pd.read_csv('/content/drive/My Drive/CICIDS2017/crossval_set.csv',names=col_names, skiprows=1)  
df_val = pd.read_csv('/content/drive/My Drive/CICIDS2017/crossval_set.csv',names=col_important, skiprows=1) 
print('Validation set size: ', df_val.shape)

Test set size:  (278270, 72)
Validation set size:  (278270, 72)


In [79]:
# Here we can see the number of rows and columns for each table.
print(df_train.shape)

(556548, 72)


Count the number of attacks

In [80]:
df_train['Label'].value_counts()

BENIGN                        278274
DoS Hulk                      115062
PortScan                       79402
DDoS                           64012
DoS GoldenEye                   5146
FTP-Patator                     3967
SSH-Patator                     2948
DoS slowloris                   2898
DoS Slowhttptest                2749
Bot                              978
Web Attack  Brute Force         753
Web Attack  XSS                 326
Infiltration                      18
Web Attack  Sql Injection        10
Heartbleed                         5
Name: Label, dtype: int64

In [81]:
print('Test set: ')
df_test['Label'].value_counts()

Test set: 


BENIGN                        139135
DoS Hulk                       57531
PortScan                       39701
DDoS                           32006
DoS GoldenEye                   2573
FTP-Patator                     1983
SSH-Patator                     1474
DoS slowloris                   1449
DoS Slowhttptest                1374
Bot                              489
Web Attack  Brute Force         376
Web Attack  XSS                 163
Infiltration                       9
Web Attack  Sql Injection         5
Heartbleed                         2
Name: Label, dtype: int64

In [82]:
print('Validation set: ')
df_val['Label'].value_counts()

Validation set: 


BENIGN                        139135
DoS Hulk                       57531
PortScan                       39701
DDoS                           32006
DoS GoldenEye                   2573
FTP-Patator                     1983
SSH-Patator                     1474
DoS slowloris                   1449
DoS Slowhttptest                1374
Bot                              489
Web Attack  Brute Force         376
Web Attack  XSS                 163
Infiltration                       9
Web Attack  Sql Injection         5
Heartbleed                         2
Name: Label, dtype: int64

## Step 2. Randomly Selecting 2000 examples from each type

First, selecting 4000 examples for each type in train dataset

In [83]:
df_label = df_train['Label']
data = df_train.drop(columns=['Label'])
X = data.values
y = encode_label(df_label.values)

In [84]:
print(X.shape)
print(y.shape)

(556548, 71)
(556548,)


In [85]:
unique, counts = np.unique(y, return_counts=True)
print(unique, counts)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14] [278274    978  64012   5146 115062   2749   2898   3967      5     18
  79402   2948    753     10    326]


In [86]:
SEED = 2
X_train,y_train = balance_data(X,y,seed=SEED, mean_samples=4000)

In [87]:
print(X_train.shape)
print(y_train.shape)
unique, counts = np.unique(y_train, return_counts=True)
print(unique, counts)

(60000, 71)
(60000,)
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14] [4000 4000 4000 4000 4000 4000 4000 4000 4000 4000 4000 4000 4000 4000
 4000]


Next, selecting 1000 examples from validation datesets

In [88]:
df_label = df_test['Label']
data = df_test.drop(columns=['Label'])
X = data.values
y = encode_label(df_label.values)

SEED = 2
X_test,y_test = balance_data(X,y,seed=SEED, mean_samples=1000)

Next, selecting 500 examples from test datesets

In [89]:
df_label = df_val['Label']
data = df_val.drop(columns=['Label'])
X = data.values
y = encode_label(df_label.values)

SEED = 2
X_val,y_val = balance_data(X,y,seed=SEED, mean_samples=500)

## Step 3. Normalization

The continuous feature values are normalized into the same feature space. This is important when using features that have different measurements, and is a general requirement of many machine learning algorithms. Therefore, the values for this dataset are also normalized using the Min-Max scaling technique, bringing them all within a range of [0,1].

In [90]:
from sklearn.preprocessing import MinMaxScaler

In [91]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_train

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.00109412, 0.00130398, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.0077991 , 0.0197178 , ..., 0.        , 0.05733978,
        0.05733978]])

In [92]:
X_train.shape

(60000, 71)

In [93]:
X_test = scaler.fit_transform(X_test)

In [94]:
X_test.shape

(15000, 71)

In [95]:
X_val = scaler.fit_transform(X_val)
X_val

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.00095839, 0.02132314, ..., 0.        , 0.        ,
        0.        ],
       [0.00100857, 0.00039029, 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.01865954, 0.0270547 , ..., 0.        , 0.        ,
        0.        ],
       [0.00302572, 0.00117087, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.02337832, 0.05175073, ..., 0.        , 0.        ,
        0.        ]])

## Step 4. One-hot encoding for labels

y_train and y_test have to be one-hot-encoded. That means they must have dimension (number_of_samples, 15), where 15 denotes number of classes.

In [96]:
from tensorflow.keras.utils import to_categorical

In [97]:
y_train_v = to_categorical(y_train, 15)
y_test_v = to_categorical(y_test, 15)
y_val_v = to_categorical(y_val, 15)
print(y_train_v.shape)
print(y_test_v.shape)
print(y_val_v.shape)

(60000, 15)
(15000, 15)
(7500, 15)


## Step 5. Build the model

In [98]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, BatchNormalization, MaxPooling1D, Flatten, Dense, Activation,Dropout
from tensorflow.keras.constraints import max_norm

In [99]:
#hyper-params
batch_size = 256 # increasing batch size with more gpu added

input_dim = X_train.shape[1]
num_class = 15                   # 15 intrusion classes, including benign traffic class
num_epochs = 90

print(input_dim)
print(num_class)

71
15


In [100]:
X_train_r = np.zeros((len(X_train), input_dim, 1))
X_train_r[:, :, 0] = X_train[:, :input_dim]
print(X_train_r.shape)

(60000, 71, 1)


In [101]:
X_val_r = np.zeros((len(X_val), input_dim, 1))
X_val_r[:, :, 0] = X_val[:, :input_dim]
print(X_val_r.shape)

(7500, 71, 1)


In [102]:
X_test_r = np.zeros((len(X_test), input_dim, 1))
X_test_r[:, :, 0] = X_test[:, :input_dim]
print(X_test_r.shape)

(15000, 71, 1)


**Model with 2 Con1D layers**

In [112]:
model2 = Sequential()

# input layer
model2.add(Conv1D(filters=60, kernel_size=11,  input_shape=(71,1)))
#model2.add(BatchNormalization(axis=1))
model2.add(Activation('relu'))
model2.add(Dropout(0.1))

model2.add(Conv1D(filters=60, kernel_size=3))
#model2.add(BatchNormalization(axis=1))
model2.add(Activation('relu'))
model2.add(Dropout(0.1))

model2.add(Conv1D(filters=60, kernel_size=7))
#model2.add(BatchNormalization(axis=1))
model2.add(Activation('relu'))
model2.add(Dropout(0.1))

model2.add(Flatten())
#model2.add(Dropout(0.1))
model2.add(Dense(128, activation='relu'))
model2.add(Dense(num_class))
model2.add(Activation('softmax'))

model2.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_23 (Conv1D)           (None, 61, 60)            720       
_________________________________________________________________
activation_30 (Activation)   (None, 61, 60)            0         
_________________________________________________________________
dropout_22 (Dropout)         (None, 61, 60)            0         
_________________________________________________________________
conv1d_24 (Conv1D)           (None, 59, 60)            10860     
_________________________________________________________________
activation_31 (Activation)   (None, 59, 60)            0         
_________________________________________________________________
dropout_23 (Dropout)         (None, 59, 60)            0         
_________________________________________________________________
conv1d_25 (Conv1D)           (None, 53, 60)           

In [115]:
learning_rates = 1e-3
optim = tf.keras.optimizers.Adam(lr=learning_rates, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
model2.compile(loss='categorical_crossentropy', optimizer=optim, metrics=['accuracy']) 

In [116]:
model2.fit(X_train_r, y_train_v, epochs=200, batch_size=batch_size, validation_data=(X_val_r, y_val_v), verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f62c845c550>

**Model with 5 Con1D layers**

In [55]:
model = Sequential()

# input layer
model.add(Conv1D(filters=64, kernel_size=23, padding='same', input_shape=(71,1)))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=128, kernel_size=13))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=256, kernel_size=7))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))

model.add(Conv1D(filters=512, kernel_size=3))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))

model.add(Conv1D(filters=256, kernel_size=3))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dense(num_class))
model.add(Activation('softmax'))


model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_3 (Conv1D)            (None, 71, 64)            1536      
_________________________________________________________________
batch_normalization (BatchNo (None, 71, 64)            284       
_________________________________________________________________
activation_4 (Activation)    (None, 71, 64)            0         
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 35, 64)            0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 23, 128)           106624    
_________________________________________________________________
batch_normalization_1 (Batch (None, 23, 128)           92        
_________________________________________________________________
activation_5 (Activation)    (None, 23, 128)          

In [56]:
learning_rates = 0.0001
optim = tf.keras.optimizers.Adam(lr=learning_rates, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
model.compile(loss='categorical_crossentropy', optimizer=optim, metrics=['accuracy']) 

## Step 4. Training the model

In [60]:
# fit network
model.fit(X_train_r, y_train_v, epochs=200, batch_size=batch_size, validation_data=(X_val_r, y_val_v), verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

KeyboardInterrupt: ignored

In [59]:
# evaluate model
accuracy = model.evaluate(X_test_r, y_test_v, batch_size=batch_size, verbose=1)



## Step 5. Reorder the features and put the most important feature first

In [None]:
column_names = [
 'Dst Port',
 'Protocol',
 'Flow Duration',
 'Tot Fwd Pkts',
 'Tot Bwd Pkts',
 'TotLen Fwd Pkts',
 'TotLen Bwd Pkts',
 'Fwd Pkt Len Max',
 'Fwd Pkt Len Min',
 'Fwd Pkt Len Mean',
 'Fwd Pkt Len Std',
 'Bwd Pkt Len Max',
 'Bwd Pkt Len Min',
 'Bwd Pkt Len Mean',
 'Bwd Pkt Len Std',
 'Flow Byts/s',
 'Flow Pkts/s',
 'Flow IAT Mean',
 'Flow IAT Std',
 'Flow IAT Max',
 'Flow IAT Min',
 'Fwd IAT Tot',
 'Fwd IAT Mean',
 'Fwd IAT Std',
 'Fwd IAT Max',
 'Fwd IAT Min',
 'Bwd IAT Tot',
 'Bwd IAT Mean',
 'Bwd IAT Std',
 'Bwd IAT Max',
 'Bwd IAT Min',
 'Fwd PSH Flags',
 'Bwd PSH Flags',
 'Fwd URG Flags',
 'Bwd URG Flags',
 'Fwd Header Len',
 'Bwd Header Len',
 'Fwd Pkts/s',
 'Bwd Pkts/s',
 'Pkt Len Min',
 'Pkt Len Max',
 'Pkt Len Mean',
 'Pkt Len Std',
 'Pkt Len Var',
 'FIN Flag Cnt',
 'SYN Flag Cnt',
 'RST Flag Cnt',
 'PSH Flag Cnt',
 'ACK Flag Cnt',
 'URG Flag Cnt',
 'CWE Flag Count',
 'ECE Flag Cnt',
 'Down/Up Ratio',
 'Pkt Size Avg',
 'Fwd Seg Size Avg',
 'Bwd Seg Size Avg',
 'Fwd Byts/b Avg',
 'Fwd Pkts/b Avg',
 'Fwd Blk Rate Avg',
 'Bwd Byts/b Avg',
 'Bwd Pkts/b Avg',
 'Bwd Blk Rate Avg',
 'Subflow Fwd Pkts',
 'Subflow Fwd Byts',
 'Subflow Bwd Pkts',
 'Subflow Bwd Byts',
 'Init Fwd Win Byts',
 'Init Bwd Win Byts',
 'Fwd Act Data Pkts',
 'Fwd Seg Size Min',
 'Active Mean',
 'Active Std',
 'Active Max',
 'Active Min',
 'Idle Mean',
 'Idle Std',
 'Idle Max',
 'Idle Min']

In [None]:
len(column_names)

78

According to "**Selection and Performance Analysis of CICIDS2017 Features Importance**", the important features are: *Destination Port, Fwd IAT Min, Init_Win_bytes_Forward, Init_Win_bytes_backward* and *FlowIATMin*

In [None]:
important_column_names = ['Dst Port',
 'Fwd IAT Min',
 'Init Fwd Win Byts',
 'Init Bwd Win Byts',
 'Flow IAT Min',
 'Flow Duration',
 'Tot Fwd Pkts',
 'Tot Bwd Pkts',
 'TotLen Fwd Pkts',
 'TotLen Bwd Pkts',
 'Fwd Pkt Len Max',
 'Fwd Pkt Len Min',
 'Fwd Pkt Len Mean',
 'Fwd Pkt Len Std',
 'Bwd Pkt Len Max',
 'Bwd Pkt Len Min',
 'Bwd Pkt Len Mean',
 'Bwd Pkt Len Std',
 'Protocol',
 'Flow Byts/s',
 'Flow Pkts/s',
 'Flow IAT Mean',
 'Flow IAT Std',
 'Flow IAT Max',
 'Fwd IAT Tot',
 'Fwd IAT Mean',
 'Fwd IAT Std',
 'Fwd IAT Max',
 'Bwd IAT Tot',
 'Bwd IAT Mean',
 'Bwd IAT Std',
 'Bwd IAT Max',
 'Bwd IAT Min',
 'Fwd PSH Flags',
 'Bwd PSH Flags',
 'Fwd URG Flags',
 'Bwd URG Flags',
 'Fwd Header Len',
 'Bwd Header Len',
 'Fwd Pkts/s',
 'Bwd Pkts/s',
 'Pkt Len Min',
 'Pkt Len Max',
 'Pkt Len Mean',
 'Pkt Len Std',
 'Pkt Len Var',
 'FIN Flag Cnt',
 'SYN Flag Cnt',
 'RST Flag Cnt',
 'PSH Flag Cnt',
 'ACK Flag Cnt',
 'URG Flag Cnt',
 'CWE Flag Count',
 'ECE Flag Cnt',
 'Down/Up Ratio',
 'Pkt Size Avg',
 'Fwd Seg Size Avg',
 'Bwd Seg Size Avg',
 'Fwd Byts/b Avg',
 'Fwd Pkts/b Avg',
 'Fwd Blk Rate Avg',
 'Bwd Byts/b Avg',
 'Bwd Pkts/b Avg',
 'Bwd Blk Rate Avg',
 'Subflow Fwd Pkts',
 'Subflow Fwd Byts',
 'Subflow Bwd Pkts',
 'Subflow Bwd Byts',
 'Fwd Act Data Pkts',
 'Fwd Seg Size Min',
 'Active Mean',
 'Active Std',
 'Active Max',
 'Active Min',
 'Idle Mean',
 'Idle Std',
 'Idle Max',
 'Idle Min']

Read X_train and X_test datasets.

In [None]:
X_train = pd.read_csv(cleanfile, skiprows=0,index_col=0) 
X_test = pd.read_csv(xtestsmall, skiprows=0, index_col=0)
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77
0,80.0,6.0,69475.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,43.181,34737.5,10952.38,42482.0,26993.0,69475.0,34737.5,10952.38,42482.0,26993.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,96.0,0.0,43.181,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,225.0,-1.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,53102.0,6.0,51.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,39215.68627,51.0,0.0,51.0,51.0,51.0,51.0,0.0,51.0,51.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,39215.68627,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,256.0,-1.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,8080.0,6.0,517.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3868.471954,517.0,0.0,517.0,517.0,517.0,517.0,0.0,517.0,517.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,3868.471954,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2052.0,-1.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,22.0,6.0,369481.0,22.0,22.0,1928.0,2665.0,640.0,0.0,87.636364,137.780552,976.0,0.0,121.136364,258.64156,12430.950441,119.085961,8592.581395,22766.87,97771.0,2.0,369439.0,17592.33,35751.91,133959.0,163.0,369470.0,17593.809524,38826.475848,129556.0,11.0,0.0,0.0,0.0,0.0,712.0,712.0,59.542981,59.542981,0.0,976.0,102.066667,203.740967,41510.381818,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,104.386364,87.636364,121.136364,0.0,0.0,0.0,0.0,0.0,0.0,22.0,1928.0,22.0,2665.0,26883.0,230.0,16.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,80.0,6.0,5007525.0,5.0,3.0,646.0,364.0,646.0,0.0,129.2,288.899983,364.0,0.0,121.333333,210.155498,201.696447,1.597596,715360.714286,1868948.0,4953524.0,16.0,5007525.0,1251881.0,2467885.0,4953524.0,406.0,3255.0,1627.5,634.274783,2076.0,1179.0,0.0,0.0,0.0,0.0,112.0,72.0,0.998497,0.599098,0.0,646.0,112.222222,233.577491,54558.444444,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,126.25,129.2,121.333333,0.0,0.0,0.0,0.0,0.0,0.0,5.0,646.0,3.0,364.0,8192.0,221.0,1.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Add the column names to them

In [None]:
 X_train.columns =  column_names
 X_test.columns = column_names
 X_train.head()

Unnamed: 0,Dst Port,Protocol,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,Fwd Pkt Len Mean,Fwd Pkt Len Std,Bwd Pkt Len Max,Bwd Pkt Len Min,Bwd Pkt Len Mean,Bwd Pkt Len Std,Flow Byts/s,Flow Pkts/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Flow IAT Min,Fwd IAT Tot,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Fwd IAT Min,Bwd IAT Tot,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Len,Bwd Header Len,Fwd Pkts/s,Bwd Pkts/s,Pkt Len Min,Pkt Len Max,Pkt Len Mean,Pkt Len Std,Pkt Len Var,FIN Flag Cnt,SYN Flag Cnt,RST Flag Cnt,PSH Flag Cnt,ACK Flag Cnt,URG Flag Cnt,CWE Flag Count,ECE Flag Cnt,Down/Up Ratio,Pkt Size Avg,Fwd Seg Size Avg,Bwd Seg Size Avg,Fwd Byts/b Avg,Fwd Pkts/b Avg,Fwd Blk Rate Avg,Bwd Byts/b Avg,Bwd Pkts/b Avg,Bwd Blk Rate Avg,Subflow Fwd Pkts,Subflow Fwd Byts,Subflow Bwd Pkts,Subflow Bwd Byts,Init Fwd Win Byts,Init Bwd Win Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
0,80.0,6.0,69475.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,43.181,34737.5,10952.38,42482.0,26993.0,69475.0,34737.5,10952.38,42482.0,26993.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,96.0,0.0,43.181,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,225.0,-1.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,53102.0,6.0,51.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,39215.68627,51.0,0.0,51.0,51.0,51.0,51.0,0.0,51.0,51.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,39215.68627,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,256.0,-1.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,8080.0,6.0,517.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3868.471954,517.0,0.0,517.0,517.0,517.0,517.0,0.0,517.0,517.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,3868.471954,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2052.0,-1.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,22.0,6.0,369481.0,22.0,22.0,1928.0,2665.0,640.0,0.0,87.636364,137.780552,976.0,0.0,121.136364,258.64156,12430.950441,119.085961,8592.581395,22766.87,97771.0,2.0,369439.0,17592.33,35751.91,133959.0,163.0,369470.0,17593.809524,38826.475848,129556.0,11.0,0.0,0.0,0.0,0.0,712.0,712.0,59.542981,59.542981,0.0,976.0,102.066667,203.740967,41510.381818,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,104.386364,87.636364,121.136364,0.0,0.0,0.0,0.0,0.0,0.0,22.0,1928.0,22.0,2665.0,26883.0,230.0,16.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,80.0,6.0,5007525.0,5.0,3.0,646.0,364.0,646.0,0.0,129.2,288.899983,364.0,0.0,121.333333,210.155498,201.696447,1.597596,715360.714286,1868948.0,4953524.0,16.0,5007525.0,1251881.0,2467885.0,4953524.0,406.0,3255.0,1627.5,634.274783,2076.0,1179.0,0.0,0.0,0.0,0.0,112.0,72.0,0.998497,0.599098,0.0,646.0,112.222222,233.577491,54558.444444,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,126.25,129.2,121.333333,0.0,0.0,0.0,0.0,0.0,0.0,5.0,646.0,3.0,364.0,8192.0,221.0,1.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


reorder the columns

In [None]:
X_train_important = X_train.reindex(important_column_names, axis=1)
X_test_important = X_test.reindex(important_column_names, axis=1)
X_train_important.head()

Unnamed: 0,Dst Port,Fwd IAT Min,Init Fwd Win Byts,Init Bwd Win Byts,Flow IAT Min,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,Fwd Pkt Len Mean,Fwd Pkt Len Std,Bwd Pkt Len Max,Bwd Pkt Len Min,Bwd Pkt Len Mean,Bwd Pkt Len Std,Protocol,Flow Byts/s,Flow Pkts/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Fwd IAT Tot,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Bwd IAT Tot,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Len,Bwd Header Len,Fwd Pkts/s,Bwd Pkts/s,Pkt Len Min,Pkt Len Max,Pkt Len Mean,Pkt Len Std,Pkt Len Var,FIN Flag Cnt,SYN Flag Cnt,RST Flag Cnt,PSH Flag Cnt,ACK Flag Cnt,URG Flag Cnt,CWE Flag Count,ECE Flag Cnt,Down/Up Ratio,Pkt Size Avg,Fwd Seg Size Avg,Bwd Seg Size Avg,Fwd Byts/b Avg,Fwd Pkts/b Avg,Fwd Blk Rate Avg,Bwd Byts/b Avg,Bwd Pkts/b Avg,Bwd Blk Rate Avg,Subflow Fwd Pkts,Subflow Fwd Byts,Subflow Bwd Pkts,Subflow Bwd Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
0,80.0,26993.0,225.0,-1.0,26993.0,69475.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,43.181,34737.5,10952.38,42482.0,69475.0,34737.5,10952.38,42482.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,96.0,0.0,43.181,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,53102.0,51.0,256.0,-1.0,51.0,51.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,39215.68627,51.0,0.0,51.0,51.0,51.0,0.0,51.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,39215.68627,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,8080.0,517.0,2052.0,-1.0,517.0,517.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,3868.471954,517.0,0.0,517.0,517.0,517.0,0.0,517.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,3868.471954,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,22.0,163.0,26883.0,230.0,2.0,369481.0,22.0,22.0,1928.0,2665.0,640.0,0.0,87.636364,137.780552,976.0,0.0,121.136364,258.64156,6.0,12430.950441,119.085961,8592.581395,22766.87,97771.0,369439.0,17592.33,35751.91,133959.0,369470.0,17593.809524,38826.475848,129556.0,11.0,0.0,0.0,0.0,0.0,712.0,712.0,59.542981,59.542981,0.0,976.0,102.066667,203.740967,41510.381818,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,104.386364,87.636364,121.136364,0.0,0.0,0.0,0.0,0.0,0.0,22.0,1928.0,22.0,2665.0,16.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,80.0,406.0,8192.0,221.0,16.0,5007525.0,5.0,3.0,646.0,364.0,646.0,0.0,129.2,288.899983,364.0,0.0,121.333333,210.155498,6.0,201.696447,1.597596,715360.714286,1868948.0,4953524.0,5007525.0,1251881.0,2467885.0,4953524.0,3255.0,1627.5,634.274783,2076.0,1179.0,0.0,0.0,0.0,0.0,112.0,72.0,0.998497,0.599098,0.0,646.0,112.222222,233.577491,54558.444444,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,126.25,129.2,121.333333,0.0,0.0,0.0,0.0,0.0,0.0,5.0,646.0,3.0,364.0,1.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
X_test_important.head()

Unnamed: 0,Dst Port,Fwd IAT Min,Init Fwd Win Byts,Init Bwd Win Byts,Flow IAT Min,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,Fwd Pkt Len Mean,Fwd Pkt Len Std,Bwd Pkt Len Max,Bwd Pkt Len Min,Bwd Pkt Len Mean,Bwd Pkt Len Std,Protocol,Flow Byts/s,Flow Pkts/s,Flow IAT Mean,Flow IAT Std,Flow IAT Max,Fwd IAT Tot,Fwd IAT Mean,Fwd IAT Std,Fwd IAT Max,Bwd IAT Tot,Bwd IAT Mean,Bwd IAT Std,Bwd IAT Max,Bwd IAT Min,Fwd PSH Flags,Bwd PSH Flags,Fwd URG Flags,Bwd URG Flags,Fwd Header Len,Bwd Header Len,Fwd Pkts/s,Bwd Pkts/s,Pkt Len Min,Pkt Len Max,Pkt Len Mean,Pkt Len Std,Pkt Len Var,FIN Flag Cnt,SYN Flag Cnt,RST Flag Cnt,PSH Flag Cnt,ACK Flag Cnt,URG Flag Cnt,CWE Flag Count,ECE Flag Cnt,Down/Up Ratio,Pkt Size Avg,Fwd Seg Size Avg,Bwd Seg Size Avg,Fwd Byts/b Avg,Fwd Pkts/b Avg,Fwd Blk Rate Avg,Bwd Byts/b Avg,Bwd Pkts/b Avg,Bwd Blk Rate Avg,Subflow Fwd Pkts,Subflow Fwd Byts,Subflow Bwd Pkts,Subflow Bwd Byts,Fwd Act Data Pkts,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
0,80.0,53422.0,225.0,-1.0,53422.0,53422.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,37.43776,53422.0,0.0,53422.0,53422.0,53422.0,0.0,53422.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64.0,0.0,37.43776,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,51863.0,1.0,946.0,-1.0,1.0,3.0,3.0,0.0,31.0,0.0,31.0,0.0,10.333333,17.897858,0.0,0.0,0.0,0.0,6.0,10300000.0,1000000.0,1.5,0.7071068,2.0,3.0,1.5,0.707107,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,0.0,1000000.0,0.0,0.0,31.0,7.75,15.5,240.25,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,10.333333,10.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,31.0,0.0,0.0,1.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,80.0,81829392.0,211.0,219.0,2.0,81829392.0,2.0,1.0,8.0,0.0,8.0,0.0,4.0,5.656854,0.0,0.0,0.0,0.0,6.0,0.09776438,0.036662,40914700.0,57862120.0,81829390.0,81829392.0,81829392.0,0.0,81829392.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,64.0,32.0,0.024441,0.012221,0.0,8.0,4.0,4.618802,21.333333,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,5.333333,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,8.0,1.0,0.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,53.0,18491.0,-1.0,-1.0,33.0,101932.0,2.0,2.0,92.0,296.0,46.0,46.0,46.0,0.0,148.0,148.0,148.0,0.0,17.0,3806.459,39.241848,33977.33,43791.75,83408.0,18491.0,18491.0,0.0,18491.0,33.0,33.0,0.0,33.0,33.0,0.0,0.0,0.0,0.0,16.0,16.0,19.620924,19.620924,46.0,148.0,86.8,55.867701,3121.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,108.5,46.0,148.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,92.0,2.0,296.0,1.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,80.0,100.0,2047.0,-1.0,100.0,100.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,20000.0,100.0,0.0,100.0,100.0,100.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,20000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Step 6. Normalization Again

In [None]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train_important)
X_test = scaler.fit_transform(X_test_important)

In [None]:
X_train_r = np.zeros((len(X_train), input_dim, 1))
X_train_r[:, :, 0] = X_train[:, :input_dim]

In [None]:
X_test_r = np.zeros((len(X_test), input_dim, 1))
X_test_r[:, :, 0] = X_test[:, :input_dim]

## Step 7. Reuse the Model and train it again

In [None]:
model = Sequential()

# input layer
model.add(Conv1D(filters=32, kernel_size=17, padding='same', input_shape=(78,1)))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=128, kernel_size=7))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=2))

model.add(Conv1D(filters=256, kernel_size=5))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))

model.add(Conv1D(filters=512, kernel_size=3))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))

model.add(Conv1D(filters=256, kernel_size=3))
model.add(BatchNormalization(axis=1))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dense(num_class))
model.add(Activation('softmax'))


model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_15 (Conv1D)           (None, 78, 32)            576       
_________________________________________________________________
batch_normalization_15 (Batc (None, 78, 32)            312       
_________________________________________________________________
activation_18 (Activation)   (None, 78, 32)            0         
_________________________________________________________________
max_pooling1d_6 (MaxPooling1 (None, 39, 32)            0         
_________________________________________________________________
conv1d_16 (Conv1D)           (None, 33, 128)           28800     
_________________________________________________________________
batch_normalization_16 (Batc (None, 33, 128)           132       
_________________________________________________________________
activation_19 (Activation)   (None, 33, 128)          

In [None]:
#num_epochs = 35

model2.compile(loss='categorical_crossentropy', optimizer=optim, metrics=['accuracy']) 

In [None]:
model2.fit(X_train_r, y_train, epochs=num_epochs, batch_size=batch_size, validation_data=(X_test_r, y_test), verbose=1)

Epoch 1/90
Epoch 2/90
Epoch 3/90
Epoch 4/90
Epoch 5/90
Epoch 6/90
Epoch 7/90
Epoch 8/90
Epoch 9/90
Epoch 10/90
Epoch 11/90
Epoch 12/90
Epoch 13/90
Epoch 14/90
Epoch 15/90
Epoch 16/90
Epoch 17/90
Epoch 18/90
Epoch 19/90
Epoch 20/90
Epoch 21/90
Epoch 22/90
Epoch 23/90
Epoch 24/90
Epoch 25/90
Epoch 26/90
Epoch 27/90
Epoch 28/90
Epoch 29/90
Epoch 30/90
Epoch 31/90
Epoch 32/90
Epoch 33/90
Epoch 34/90
Epoch 35/90
Epoch 36/90
Epoch 37/90
Epoch 38/90
Epoch 39/90
Epoch 40/90
Epoch 41/90
Epoch 42/90
Epoch 43/90
Epoch 44/90
Epoch 45/90
Epoch 46/90
Epoch 47/90
Epoch 48/90
Epoch 49/90
Epoch 50/90
Epoch 51/90
Epoch 52/90
Epoch 53/90
Epoch 54/90
Epoch 55/90
Epoch 56/90
Epoch 57/90
Epoch 58/90
Epoch 59/90
Epoch 60/90
Epoch 61/90
Epoch 62/90
Epoch 63/90
Epoch 64/90
Epoch 65/90
Epoch 66/90
Epoch 67/90
Epoch 68/90
Epoch 69/90
Epoch 70/90
Epoch 71/90
Epoch 72/90
Epoch 73/90
Epoch 74/90
Epoch 75/90
Epoch 76/90
Epoch 77/90
Epoch 78/90
Epoch 79/90
Epoch 80/90
Epoch 81/90
Epoch 82/90
Epoch 83/90
Epoch 84/90
E

<tensorflow.python.keras.callbacks.History at 0x7f59cb6677f0>

In [None]:
learning_rates = 1e-6

optim = tf.keras.optimizers.Adam(lr=learning_rates, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
model2.compile(loss='categorical_crossentropy', optimizer=optim, metrics=['accuracy']) 

In [None]:
model2.fit(X_train_r, y_train, epochs=60, batch_size=batch_size, validation_data=(X_test_r, y_test), verbose=1)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<tensorflow.python.keras.callbacks.History at 0x7f59c23c59b0>