# Do some imports

In [49]:
import numpy as np
import pandas as pd

import torch
import torch.utils.data
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader, Dataset

#needed to create the Neural Network
import torch.nn as nn
import torch.nn.functional as F

#needed to preprocess the dataset
from sklearn import preprocessing
from sklearn.metrics import accuracy_score

%matplotlib notebook
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from dataloader import UNSW_NB15

#general
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

import tensorflow as tf


### Inspired by [this github file](https://github.com/alik604/cyber-security/blob/master/Intrusion-Detection/UNSW_NB15%20-%20Torch%20MLP%20and%20autoEncoder.ipynb)

# Get UNSW_NB15 train and test set

In [None]:
#!wget https://www.unsw.adfa.edu.au/unsw-canberra-cyber/cybersecurity/ADFA-NB15-Datasets/a%20part%20of%20training%20and%20testing%20set/UNSW_NB15_training-set.csv

In [None]:
#!wget https://www.unsw.adfa.edu.au/unsw-canberra-cyber/cybersecurity/ADFA-NB15-Datasets/a%20part%20of%20training%20and%20testing%20set/UNSW_NB15_testing-set.csv

# Define the Neural Network class

In [2]:
# define NN architecture
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, hidden_size_2, num_classes):
        super(Net,self).__init__()
       
        self.fc1 = nn.Linear(input_size, hidden_size)     # linear layer (input_size -> hidden_size)       
        self.fc2 = nn.Linear(hidden_size, hidden_size_2)  # linear layer (hidden_size -> hidden_2)       
        self.fc3 = nn.Linear(hidden_size_2, num_classes)  # linear layer (hidden_size_2 -> num_classes)
        
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        
    def forward(self,x):        
        out = self.fc1(x)     #x is the input tensor       
        out = self.relu1(out) #add hidden layer, with relu activation function
        
        out = self.fc2(out)        
        out = self.relu2(out)   # add hidden layer, with relu activation function
        
        out = torch.sigmoid(self.fc3(out)) #sigmoid as we use BCELoss
        
        return out


In [24]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
    
    
    def forward(self,x):
        out = self.fc1(x)
        out = self.relu(out)
        out = torch.sigmoid(self.fc2(out))
        return out

### Define Train,   Test   and    Display_Loss_Plot    methods

In [7]:
def train(model, device, train_loader, optimizer,losses):
    model.train()
    y_true = []
    y_pred = []
    for i in train_loader:
        
        #LOADING THE DATA IN A BATCH
        data , target = i
        
        #MOVING THE TENSORS TO THE CONFIGURED DEVICE
        data, target = data.to(device), target.to(device)
        
        #FORWARD PASS
        output = model(data.float())
        loss = criterion(output, target.unsqueeze(1))
        
        #BACKWARD AND OPTIMIZE
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # PREDICTIONS
        pred = np.round(output.detach())
        target = np.round(target.detach())
        y_pred.extend(pred.tolist())
        y_true.extend(target.tolist())
        
        losses.append(loss.item())
        
    print("Accuracy on training set is", accuracy_score(y_true,y_pred))
    return losses

In [8]:
#TESTING THE MODEL
def test(model, device, test_loader):
    #model in eval mode skips Dropout etc
    model.eval()
    y_true = []
    y_pred = []
    
    # set the requires_grad flag to false as we are in the test mode
    with torch.no_grad():
        for i in test_loader:
            
            #LOAD THE DATA IN A BATCH
            data,target = i
            
            # moving the tensors to the configured device
            data, target = data.to(device), target.to(device)
            
            # the model on the data
            output = model(data.float())
                       
            #PREDICTIONS
            pred = np.round(output)
            target = target.float()
            y_true.extend(target.tolist()) 
            y_pred.extend(pred.reshape(-1).tolist())
    
            
    print("Accuracy on test set is" , accuracy_score(y_true,y_pred))
    print("***********************************************************")

In [32]:
def display_loss_plot(losses):
    x_axis = [i for i in range(len(losses))]
    plt.plot(x_axis,losses)
    plt.title('Loss of the model')
    plt.xlabel('iterations')
    plt.ylabel('Cross entropy loss')
    plt.show()

## Define some parameters first

In [55]:
input_size = 196      # 42 for integer encoding
hidden_size = 64      # 1st layer number of neurons
hidden_size_2 = 64    # 2nd layer number of neurons
num_classes = 1    # There are  different classes

num_epochs = 5
learning_rate = 0.001
BATCH_SIZE_1 = 50 #train_loader as it has  175341  observations
BATCH_SIZE_2 = 50 #test_loader as it has  82332  observations

device = 'cpu'

## Initialize UNSW_NB15 class

In [15]:
#get the train dataframe
train_dataset = UNSW_NB15(file_path ='UNSW_NB15_training-set.csv')
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE_1, shuffle=True)

#get the test dataframe
test_dataset = UNSW_NB15(file_path ='UNSW_NB15_testing-set.csv')
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE_2, shuffle=True)


torch.Size([175341, 197])
torch.Size([82332, 197])


## Initialize Neural Network class

In [56]:
#model = Net(input_size, hidden_size, hidden_size_2, num_classes).to(device)
model = Net(input_size, hidden_size, num_classes)
print(model)

Net(
  (fc1): Linear(in_features=196, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
)


## Define loss and optimizer 

In [57]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

## Train and Test the model

In [58]:
losses = []
for epoch in tqdm(range(num_epochs)):
        losses = train(model,device,train_loader,optimizer,losses)
        test(model,device,test_loader)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

Accuracy on training set is 0.6771605043885913
Accuracy on test set is 0.551498809697323
***********************************************************
Accuracy on training set is 0.6819796852989318
Accuracy on test set is 0.5513652042948064
***********************************************************
Accuracy on training set is 0.6819625757809069
Accuracy on test set is 0.5513652042948064
***********************************************************
Accuracy on training set is 0.6818428091547328
Accuracy on test set is 0.5513652042948064
***********************************************************
Accuracy on training set is 0.6818827313634575
Accuracy on test set is 0.5513652042948064
***********************************************************



In [60]:
x_axis = [i for i in range(len(losses))]
plt.plot(x_axis,losses)
plt.title('Loss of the model')
plt.xlabel('iterations')
plt.ylabel('Cross entropy loss')

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Cross entropy loss')

[31.428571701049805,
 42.85714340209961,
 24.285715103149414,
 30.0,
 31.428571701049805,
 25.714284896850586,
 28.571428298950195,
 24.285715103149414,
 32.85714340209961,
 32.85714340209961,
 31.428571701049805,
 24.285715103149414,
 32.85714340209961,
 28.571428298950195,
 27.14285659790039,
 30.0,
 32.85714340209961,
 27.14285659790039,
 35.71428680419922,
 31.428571701049805,
 31.428571701049805,
 28.571428298950195,
 41.42856979370117,
 27.14285659790039,
 27.14285659790039,
 27.14285659790039,
 32.85714340209961,
 35.71428680419922,
 30.0,
 35.71428680419922,
 28.571428298950195,
 24.285715103149414,
 27.14285659790039,
 28.571428298950195,
 31.428571701049805,
 30.0,
 25.714284896850586,
 38.57143020629883,
 27.14285659790039,
 37.14285659790039,
 28.571428298950195,
 21.428571701049805,
 32.85714340209961,
 15.714285850524902,
 35.71428680419922,
 32.85714340209961,
 37.14285659790039,
 31.428571701049805,
 38.57143020629883,
 25.714284896850586,
 28.571428298950195,
 38.57143

### Recreate this paper's results
    https://www.researchgate.net/publication/332100759_Intrusion_Detection_Using_Big_Data_and_Deep_Learning_Techniques

#### Just a simple example to try to understand this

In [None]:
class LogisticModel(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(LogisticModel, self).__init__()
        self.linear = nn.Linear(in_dim, out_dim)
    def forward(self, x):
        out = F.sigmoid(self.linear(x))
        return out

In [None]:
model = LogisticModel(1,1)
model

In [None]:
criterion = torch.nn.BCELoss(size_average = True)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [None]:
epochs = 5
for epoch in range (epochs):
    epoch +=1
    x_train = torch.tensor([[1.6], [2.1], [1.3], [4.8], [3.5]], dtype=torch.float).reshape(-1,1)
    y_train = torch.tensor([[0], [0], [0], [1], [1]], dtype = torch.float).reshape(-1,1)
    
    inputs = x_train
    labels = y_train
    
    out = model(inputs)
    
    optimizer.zero_grad()
    loss = criterion(out,labels)
    
    loss.backward()
    optimizer.step()
    predicted = model.forward(x_train)
    print('Epoch {}, Loss {}'.format(epoch, loss.item()))
    print(model.state_dict())

In [None]:
x_train.shape

In [None]:
x = x_train.detach().numpy()
plt.plot(x, predicted.detach().numpy(), label ="predicted")
plt.plot(x, y_train.detach().numpy(),"go",label = "from data")
plt.legend()
plt.show()

#### Test the model

In [None]:
test = torch.tensor([[0.1], [1.5], [2.3], [3.0], [6.4]])
results = model(test)
for result in results:
    if result < 0.5:
        print(result, "False")
    else:
        print(result, "True")