In [67]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from torch import optim
import numpy as np
import pandas as pd
import os
import io
import re
import sys
import time
%matplotlib inline

In [68]:
IDS_df = pd.read_csv("data/03-02-2018.csv")

# To display the top 5 rows
IDS_df.head(5)

Unnamed: 0,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,TotLen Fwd Pkts,TotLen Bwd Pkts,Fwd Pkt Len Max,Fwd Pkt Len Min,...,Fwd Seg Size Min,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,443,6,02/03/2018 08:47:38,141385,9,7,553,3773.0,202,0,...,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Benign
1,49684,6,02/03/2018 08:47:38,281,2,1,38,0.0,38,0,...,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Benign
2,443,6,02/03/2018 08:47:40,279824,11,15,1086,10527.0,385,0,...,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Benign
3,443,6,02/03/2018 08:47:40,132,2,0,0,0.0,0,0,...,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Benign
4,443,6,02/03/2018 08:47:41,274016,9,13,1285,6141.0,517,0,...,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Benign


In [69]:
# print shape before dropping NaN rows
print(IDS_df.shape)

(1048575, 80)


In [70]:
# Finding the null values.
print(IDS_df.isin([np.nan, np.inf, -np.inf]).sum().sum())

8100


In [71]:
#  first replace infs to NaN:
IDS_df = IDS_df.replace([np.inf, -np.inf], np.nan)

In [72]:
# print shape after dropping NaN rows
IDS_df = IDS_df.dropna()
print(IDS_df.shape)
IDS_df = IDS_df.reset_index(drop=True)

(1044525, 80)


In [73]:
# Finding the null values.
print(IDS_df.isin([np.nan, np.inf, -np.inf]).sum().sum())

0


Examine the proportion of types of traffic:

In [74]:
y = IDS_df["Label"].values
from collections import Counter

Counter(y).most_common()

[('Benign', 758334), ('Bot', 286191)]

Convert all non-normal observations into a single class:

In [75]:
def label_anomalous(text):
    """Binarize target labels into normal or anomalous."""
    if text == "Benign":
        return 0
    else:
        return 1

IDS_df["Label"] = IDS_df["Label"].apply(label_anomalous)

In [76]:
y = IDS_df["Label"].values
Counter(y).most_common()

[(0, 758334), (1, 286191)]

Convert all categorical features into numerical form:

In [77]:
from sklearn.preprocessing import LabelEncoder

encodings_dictionary = dict()
for c in IDS_df.columns:
    if IDS_df[c].dtype == "object":
        encodings_dictionary[c] = LabelEncoder()
        IDS_df[c] = encodings_dictionary[c].fit_transform(IDS_df[c])

Split the dataset into normal and abnormal observations:

In [1]:
IDS_df_normal = IDS_df[IDS_df["Label"] == 0]
IDS_df_abnormal = IDS_df[IDS_df["Label"] == 1]
y_normal = IDS_df_normal.pop("Label").values
X_normal = IDS_df_normal.values
y_anomaly = IDS_df_abnormal.pop("Label").values
X_anomaly = IDS_df_abnormal.values

NameError: name 'IDS_df' is not defined

Train-test split the dataset:

In [105]:
from sklearn.model_selection import train_test_split

X_normal_train, X_normal_test, y_normal_train, y_normal_test = train_test_split(
    X_normal, y_normal, test_size=0.3, random_state=11
)
X_anomaly_train, X_anomaly_test, y_anomaly_train, y_anomaly_test = train_test_split(
    X_anomaly, y_anomaly, test_size=0.3, random_state=11
)

import numpy as np

# X_train = np.concatenate((X_normal_train, X_anomaly_train))
# y_train = np.concatenate((y_normal_train, y_anomaly_train))
# X_test = np.concatenate((X_normal_test, X_anomaly_test))
# y_test = np.concatenate((y_normal_test, y_anomaly_test))

X_train = np.concatenate((X_normal_train[:10000], X_anomaly_train[:10000]))
y_train = np.concatenate((y_normal_train[:10000], y_anomaly_train[:10000]))
X_test = np.concatenate((X_normal_test[:1000], X_anomaly_test[:1000]))
y_test = np.concatenate((y_normal_test[:1000], y_anomaly_test[:1000]))

In [106]:
X_train.shape

(20000, 79)

In [107]:
batch_size = 100

# Pytorch
X_train  = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train)

X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test)

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(X_train, y_train)
valid = torch.utils.data.TensorDataset(X_test, y_test)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = False)
valid_loader = torch.utils.data.DataLoader(valid, batch_size = batch_size, shuffle = False)

print('Completed loading data')

Completed loading data


In [108]:
train_loader.dataset.tensors[0].shape

torch.Size([20000, 79])

In [113]:
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
        
# Defining the DNN model
input_size = train_loader.dataset.tensors[0].shape[1]
hidden_layers = [256,256]
output_size = 2

# model = nn.Sequential(
#     nn.Linear(input_size, hidden_layers[0]),
#     nn.ReLU(),
#     nn.Linear(hidden_layers[0], hidden_layers[1]),
#     nn.ReLU(),
#     nn.Linear(hidden_layers[1], output_size),
#     nn.Sigmoid()
# )


# model definition
class MLP(nn.Module):
    # define model elements
    def __init__(self, n_inputs):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(n_inputs, hidden_layers[0])
        self.activ1 = nn.ReLU()
        
        self.layer2 = nn.Linear(hidden_layers[0], hidden_layers[1])
        self.activ2 = nn.ReLU()
        
        self.layer3 = nn.Linear(hidden_layers[1], output_size)
                
    # forward propagate input
    def forward(self, x):
        x = self.layer1(x)
        x = self.activ1(x)
        
        x = self.layer2(x)
        x = self.activ2(x)
        
        x = self.layer3(x)
        
        return x

model = MLP(input_size)
print(model)
model.to(device)

 # Cross Entropy Loss 
error = nn.CrossEntropyLoss().to(device)
# SGD Optimizer
learning_rate = 0.001
# TODO: Try SGD
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

MLP(
  (layer1): Linear(in_features=79, out_features=256, bias=True)
  (activ1): ReLU()
  (layer2): Linear(in_features=256, out_features=256, bias=True)
  (activ2): ReLU()
  (layer3): Linear(in_features=256, out_features=2, bias=True)
)


In [114]:
print('Start training...')
start_time = time.time()
    
epochs = 20
for e in range(epochs):
    count = 0
    loss_list = []
    iteration_list = []
    accuracy_list = []
   
    for i, (data, labels) in enumerate(train_loader):
        train = data.to(device)
        #print(labels)
        labels = labels.to(device)

        # Clear gradients
        optimizer.zero_grad()
        # Forward propagation
        outputs = model(train)
        
        # Calculate softmax and cross entropy loss
        loss = error(outputs, labels)
        # Calculating gradients
        loss.backward()
        # Update parameters
        optimizer.step()

        if count % 100 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for data, labels in valid_loader:
                valid = data.to(device)
                #print('Lables:', labels)
                
                labels = labels.to(device)

                # Forward propagation
                outputs = model(valid)
                # Get predictions from the maximum value
                predicted = torch.max(outputs.data, 1)[1]

                #print('Predicted: ', predicted)
                
                # Total number of labels
                total += len(labels)
                correct += (predicted == labels).sum()

            accuracy = 100 * correct / float(total)

            # store loss and iteration
            loss_list.append(loss.data)
            iteration_list.append(count)
            accuracy_list.append(accuracy)
        if count % 100 == 0:
            # Print Loss
            print('Epoch: {} Iteration: {}  Loss: {}  Accuracy: {} %'.format(e + 1, count, loss.data, accuracy))

        count += 1

end_time = time.time()
print('Epochs completed. Time taken (seconds): ', str(end_time - start_time))
    

Start training...
Epoch: 1 Iteration: 0  Loss: 23.320106506347656  Accuracy: 50 %
Epoch: 1 Iteration: 100  Loss: 7099.6474609375  Accuracy: 50 %
Epoch: 2 Iteration: 0  Loss: 11885061.0  Accuracy: 50 %
Epoch: 2 Iteration: 100  Loss: 3347.555908203125  Accuracy: 50 %
Epoch: 3 Iteration: 0  Loss: 9878019.0  Accuracy: 50 %
Epoch: 3 Iteration: 100  Loss: 1730.6136474609375  Accuracy: 50 %
Epoch: 4 Iteration: 0  Loss: 328680.6875  Accuracy: 79 %
Epoch: 4 Iteration: 100  Loss: 9.142168998718262  Accuracy: 94 %
Epoch: 5 Iteration: 0  Loss: 2.5265231132507324  Accuracy: 99 %
Epoch: 5 Iteration: 100  Loss: 0.10745707154273987  Accuracy: 98 %
Epoch: 6 Iteration: 0  Loss: 0.0  Accuracy: 98 %
Epoch: 6 Iteration: 100  Loss: 0.12235981225967407  Accuracy: 97 %
Epoch: 7 Iteration: 0  Loss: 0.06597274541854858  Accuracy: 99 %
Epoch: 7 Iteration: 100  Loss: 1.7721558809280396  Accuracy: 89 %
Epoch: 8 Iteration: 0  Loss: 0.0  Accuracy: 99 %
Epoch: 8 Iteration: 100  Loss: 0.0004635810910258442  Accuracy: 