# Do some imports

In [1]:
import numpy as np
import pandas as pd

import torch
import torch.utils.data
import torchvision.transforms as transforms
from torchvision import datasets
#needed to create the Neural Network
import torch.nn as nn
import torch.nn.functional as F

#needed to preprocess the dataset
from sklearn import preprocessing

%matplotlib notebook
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from dataloader import UNSW_NB15

#general
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)


### Inspired by [this github file](https://github.com/alik604/cyber-security/blob/master/Intrusion-Detection/UNSW_NB15%20-%20Torch%20MLP%20and%20autoEncoder.ipynb)

# Get UNSW_NB15 train and test set

In [2]:
#!wget https://www.unsw.adfa.edu.au/unsw-canberra-cyber/cybersecurity/ADFA-NB15-Datasets/a%20part%20of%20training%20and%20testing%20set/UNSW_NB15_training-set.csv

In [3]:
#!wget https://www.unsw.adfa.edu.au/unsw-canberra-cyber/cybersecurity/ADFA-NB15-Datasets/a%20part%20of%20training%20and%20testing%20set/UNSW_NB15_testing-set.csv

# Define the Neural Network class

In [4]:
# define NN architecture
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, hidden_size_2, num_classes):
        super(Net,self).__init__()
        self.input_size = input_size
        # linear layer (input_size -> hidden_size)
        self.fc1 = nn.Linear(input_size, hidden_size)
        # linear layer (hidden_size -> hidden_2)
        self.fc2 = nn.Linear(hidden_size, hidden_size_2)
        # linear layer (hidden_size_2 -> num_classes)
        self.fc3 = nn.Linear(hidden_size_2, num_classes)
        # dropout layer (p=0.2)
        # dropout prevents overfitting of data
        #self.droput = nn.Dropout(0.2)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        
    def forward(self,x):
        #x is the input tensor
        out = self.fc1(x)
        #add hidden layer, with relu activation function
        out = self.relu1(out)
        out = self.fc2(out)
        # add hidden layer, with relu activation function
        out = self.relu2(out)
        # add dropout instead of relu or not..?
        #out = self.droput(out)
        out = self.fc3(out)
        return out


In [5]:
def split_df_x_y(df1,df2) -> np.ndarray:
        """Splits dataframe into x and y numpy arrays. Y has the last column and the X has all the other"""
        x1 = df1.iloc[:, 0:-1].values
        y1 = df1.iloc[:, -1].values # Last column is the label
        x2 = df2.iloc[:, 0:-1].values
        y2 = df2.iloc[:, -1].values # Last column is the label
        return x1,y2,x2,y2

## Initialize UNSW_NB15 class

In [6]:
#get the train dataframe
unsw_nb15_training = UNSW_NB15(file_path ='UNSW_NB15_training-set.csv')
train   = unsw_nb15_training.one_hot_encoding_df()

#get the test dataframe
unsw_nb15_testing = UNSW_NB15(file_path ='UNSW_NB15_testing-set.csv')
test   = unsw_nb15_testing.one_hot_encoding_df()

x_train, y_train, x_test, y_test  = split_df_x_y(train, test)
train.head()

Unnamed: 0,dur,spkts,dpkts,sbytes,dbytes,rate,sttl,dttl,sload,dload,sloss,dloss,sinpkt,dinpkt,sjit,djit,swin,stcpb,dtcpb,dwin,tcprtt,synack,ackdat,smean,dmean,trans_depth,response_body_len,ct_srv_src,ct_state_ttl,ct_dst_ltm,ct_src_dport_ltm,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,label,tcp,udp,arp,ospf,icmp,igmp,rtp,ddp,ipv6-frag,cftp,wsn,pvp,wb-expak,mtp,pri-enc,sat-mon,cphb,sun-nd,iso-ip,xtp,il,unas,mfe-nsp,3pc,ipv6-route,idrp,bna,swipe,kryptolan,cpnx,rsvp,wb-mon,vmtp,ib,dgp,eigrp,ax.25,gmtp,pnni,sep,pgm,idpr-cmtp,zero,rvd,mobile,narp,fc,pipe,ipcomp,ipv6-no,sat-expak,ipv6-opts,snp,ipcv,br-sat-mon,ttp,tcf,nsfnet-igp,sprite-rpc,aes-sp3-d,sccopmce,sctp,qnx,scps,etherip,aris,pim,compaq-peer,vrrp,iatp,stp,l2tp,srp,sm,isis,smp,fire,ptp,crtp,sps,merit-inp,idpr,skip,any,larp,ipip,micp,encap,ifmp,tp++,a/n,ipv6,i-nlsp,ipx-n-ip,sdrp,tlsp,gre,mhrp,ddx,ippc,visa,secure-vmtp,uti,vines,crudp,iplt,ggp,ip,ipnip,st2,argus,bbn-rcc,egp,emcon,igp,nvp,pup,xnet,chaos,mux,dcn,hmp,prm,trunk-1,xns-idp,leaf-1,leaf-2,rdp,irtp,iso-tp4,netblt,trunk-2,cbt,-,ftp,smtp,snmp,http,ftp-data,dns,ssh,radius,pop3,dhcp,ssl,irc,FIN,INT,CON,ECO,REQ,RST,PAR,URN,no,ACC,CLO
0,0.121478,6,4,258,172,74.08749,252,254,14158.94238,8495.365234,0,0,24.2956,8.375,30.177547,11.830604,255,621772692,-2092433665,255,0.0,0.0,0.0,43,43,0,0,1,0,1,1,1,1,0,0,0,1,1,0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.649902,14,38,734,42014,78.473372,62,252,8395.112305,503571.3125,2,17,49.915,15.432865,61.426934,1387.77833,255,1417884146,-1217579325,255,0.0,0.0,0.0,52,1106,0,0,43,1,1,1,1,2,0,0,0,1,6,0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.623129,8,16,364,13186,14.170161,62,252,1572.271851,60929.23047,1,6,231.875571,102.737203,17179.58686,11420.92623,255,2116150707,-1331852323,255,0.111897,0.061458,0.050439,46,824,0,0,7,1,2,1,1,3,0,0,0,2,6,0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.681642,12,12,628,770,13.677108,62,252,2740.178955,3358.62207,1,3,152.876547,90.235726,259.080172,4991.784669,255,1107119177,1047442890,255,0.0,0.0,0.0,52,64,0,0,1,1,2,1,1,3,1,1,0,2,1,0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.449454,10,6,534,268,33.373826,254,252,8561.499023,3987.059814,2,1,47.750333,75.659602,2415.837634,115.807,255,-1858829747,1977154190,255,0.128381,0.071147,0.057234,53,45,0,0,43,1,2,2,1,40,0,0,0,2,39,0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Define some parameters first

In [7]:
input_size = 196      # 42 for integer encoding
hidden_size = 64      # 1st layer number of neurons
hidden_size_2 = 64    # 2nd layer number of neurons
num_classes = 2       # There are 9 different types of malicious packets + Normal

num_epochs = 20
batch_size = 32
learning_rate = 0.001

n_total_steps = len(x_train)

device = 'cpu'

## Initialize Neural Network class

In [8]:
model = Net(input_size, hidden_size, hidden_size_2, num_classes).to(device)
print(model)

Net(
  (fc1): Linear(in_features=196, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=2, bias=True)
  (relu1): ReLU()
  (relu2): ReLU()
)


## Define loss and optimizer 

In [9]:
#criterion = nn.CrossEntropyLoss() # This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class.
criterion = nn.BCELoss
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

## Train the model

In [None]:
losses = []
for epoch in tqdm(range(3)):
   
    for i in range(0, x_train.shape[0], batch_size):
        optimizer.zero_grad()

        x = torch.as_tensor(x_train[i:i+batch_size], dtype=torch.float).to(device)
        y = torch.as_tensor(y_train[i:i+batch_size], dtype=torch.long).to(device)
        
        outputs = model(x)
        loss = criterion(outputs, y)
        losses.append(loss)
        # Backward and optimize
        loss.backward()
        optimizer.step()

In [None]:
y

## Check the loss

In [None]:
x_axis = [i for i in range(len(losses))]
plt.plot(x_axis,np.log(losses))
plt.title('Loss of the model')
plt.xlabel('iterations')
plt.ylabel('Cross entropy loss')

# Test the model

In [None]:
#%pixie_debugger
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0  
    for i in range(0, x_test.shape[0], batch_size):
        x = torch.as_tensor(x_test[i:i+batch_size], dtype=torch.float).to(device)
        y = torch.as_tensor(y_test[i:i+batch_size], dtype=torch.long).to(device)
        
        outputs = model(x)
        #print(outputs)
        #print(len(outputs.data))
        if len(outputs.data) > 0:
            # max returns (value ,index)
            #print(torch.max(outputs.data, dim=1))
            _, predicted = torch.max(outputs.data, dim=1)
            
            n_samples += y.size(0)
            n_correct += (predicted == y).sum().item()
            #print("n_samples=", n_samples, "n_correct",n_correct )
        else:
            print("what???")
            print(x, outputs.data)
    acc = 100.0 * n_correct / (n_samples+1)
    print(f'Accuracy of the network: {acc} %')

### Recreate this paper's results
    https://www.researchgate.net/publication/332100759_Intrusion_Detection_Using_Big_Data_and_Deep_Learning_Techniques