# GCN Based Model

In [9]:
# Import packages

import os
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F

import dgl
import dgl.function as fn
from dgl.nn.pytorch import GraphConv

import networkx as nx

## Load Data

In [5]:
# Label Function

def label(flag: str) -> int:
    return {
        '-': 0,
        'snmpgetattack': 1,
        'warez': 2,
        'portsweep': 3,
        'nmap': 4,
        'warezclient': 5,
        'ipsweep': 6,
        'dict': 7,
        'neptune': 8,
        'smurf': 9,
        'pod': 10,
        'snmpguess': 11,
        'teardrop': 12,
        'satan': 13,
        'httptunnel-e': 14,
        'ignore': 15,
        'mscan': 16,
        'guest': 17,
        'rootkit': 18,
        'back': 19,
        'apache2': 20,
        'processtable': 21,
        'mailbomb': 22,
        'smurfttl': 23,
        'saint': 24
    }.get(flag)

In [42]:
# Graph Generate

path = 'data/train/train_000.txt'
nodes = []
nodes_in = {}
nodes_out = {}

# Feature List
src_list = collections.OrderedDict()
dst_list = collections.OrderedDict()
ip_list_trans = collections.OrderedDict()
num_edge_list = collections.OrderedDict()
port_list = []
time_list = []
label_list = []

# ADD UP EDGE
# edges = np.empty([1, 4])
edge_num = 0

data = open(path)
for line in data:
    src_ip, dst_ip, port, time, connection_type = line.split()
    
    if (int(src_ip), int(dst_ip)) in ip_list_trans.keys():
        edge_num_temp = ip_list_trans[(int(src_ip), int(dst_ip))]
        num_edge_list[edge_num_temp] += 1
        port_list[edge_num_temp] += int(port)
        time_list[edge_num_temp] += int(time)
        label_list[edge_num_temp] += label(connection_type)
        
        # ADD UP EDGE
#         edges = np.vstack((edges, [int(src_ip), int(dst_ip), int(port), int(time)]))
        
        continue
    
    nodes.append(int(src_ip))
    nodes.append(int(dst_ip))
    
    ip_list_trans[(int(src_ip), int(dst_ip))] = edge_num
    src_list[edge_num] = int(src_ip)
    dst_list[edge_num] = int(dst_ip)
    num_edge_list[edge_num] = 1
    port_list.append(int(port))
    time_list.append(int(time))
    label_list.append(label(connection_type))
    
    # ADD UP EDGE
#     edges = np.vstack((edges, [int(src_ip), int(dst_ip), int(port), int(time)]))
    
    if int(dst_ip) in nodes_in.keys():
        nodes_in[dst_ip].append(edge_num)
    else:
        nodes_in[dst_ip] = [edge_num]
        
    if int(src_ip) in nodes_out.keys():
        nodes_out[src_ip].append(edge_num)
    else:
        nodes_out[src_ip] = [edge_num]
    edge_num += 1
        
nodes = list(set(nodes))
edge_trans_src = []
edge_trans_dst = []

# ADD UP EDGE
# edges = np.delete(edges, obj=0, axis=0)

nodes_cal = list(set(nodes_in.keys()).intersection(set(nodes_out.keys())))

for node in nodes_cal:
    for src_edge in nodes_in[node]:
        for dst_edge in nodes_out[node]:
            edge_trans_src.append(src_edge)
            edge_trans_dst.append(dst_edge)

In [43]:
graph = dgl.DGLGraph()
graph.add_nodes(len(ip_list_trans.keys()))
graph.add_edges(edge_trans_src, edge_trans_dst)

graph.ndata['src_ip'] = torch.tensor(list(src_list.values()))
graph.ndata['dst_ip'] = torch.tensor(list(dst_list.values()))
graph.ndata['port'] = torch.tensor(port_list)
graph.ndata['time'] = torch.tensor(time_list)
graph.ndata['label'] = torch.tensor(label_list)

graph = dgl.add_self_loop(graph)

In [44]:
edges = np.empty([1, 4]).astype(np.float32)
for i in range(len(ip_list_trans.keys())):
    edges = np.vstack((edges, [src_list[i], dst_list[i], port_list[i], time_list[i]]))
edges = np.delete(edges, obj=0, axis=0) 
edges = edges.astype(np.float32)

In [76]:
print(graph.number_of_nodes())
print(graph.number_of_edges())
print(edges.shape)
print(edges.dtype)

# labels = torch.tensor(label_list)
# print(labels + 1)

labels_2dim = torch.tensor(np.ones([113, 1]).astype(np.float32)) 

113
127
(113, 4)
float32


# GCN Model

In [28]:
class GCN(nn.Module):
    def __init__(self, in_feats, hidden_size, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, hidden_size)
        self.conv2 = GraphConv(hidden_size, num_classes)

    def forward(self, g, inputs):
        h = self.conv1(g, inputs)
        h = torch.relu(h)
        h = self.conv2(g, h)
        return h

## Train

In [80]:
print(outputs)

tensor([[ -156.4679],
        [  228.3700],
        [  224.4591],
        [  367.0169],
        [  -73.4662],
        [  132.0626],
        [   -3.8568],
        [  322.7796],
        [ 1183.0669],
        [  208.7407],
        [  233.0819],
        [  585.2012],
        [ -293.6375],
        [ -182.8051],
        [  509.3202],
        [    7.1576],
        [   34.1676],
        [  599.7781],
        [  -21.1442],
        [ -540.6116],
        [  275.5155],
        [ 1483.7281],
        [ -351.9604],
        [ -197.8349],
        [  602.0142],
        [ -145.3976],
        [  601.0655],
        [  182.0296],
        [  100.1661],
        [ -160.0551],
        [ -561.4727],
        [  602.0884],
        [ 2292.7505],
        [ -139.9772],
        [  379.0687],
        [  364.3945],
        [   37.3781],
        [  -34.0244],
        [  448.1956],
        [ -297.6210],
        [  604.7244],
        [ 2762.4048],
        [   87.2391],
        [ -276.1029],
        [  573.6793],
        [ 

In [79]:
net = GCN(4, 8, 1)

inputs = torch.tensor(edges)
labels = torch.tensor(label_list) + 1
# labels = torch.tensor(np.arange(113).astype(np.long))

# print(len(inputs))
# print(len(labels))

import itertools

optimizer = torch.optim.Adam(itertools.chain(net.parameters()), lr=0.01)
all_logits = []
loss_function = nn.MSELoss()

for epoch in range(500):
    outputs = net(graph, inputs)
    all_logits.append(outputs.detach())
    
#     print(outputs)
#     logp = F.log_softmax(outputs, 1)
    
#     print(logp)
#     print(labels)

#     print(outputs)
#     print(labels_2dim)

    loss = loss_function(outputs, labels_2dim)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print('Epoch %d | Loss: %.4f' % (epoch, loss.item()))

Epoch 0 | Loss: 9898314752.0000
Epoch 1 | Loss: 5950507008.0000
Epoch 2 | Loss: 3063011584.0000
Epoch 3 | Loss: 1226925696.0000
Epoch 4 | Loss: 270196640.0000
Epoch 5 | Loss: 32156670.0000
Epoch 6 | Loss: 297984224.0000
Epoch 7 | Loss: 805716544.0000
Epoch 8 | Loss: 1301330560.0000
Epoch 9 | Loss: 1613597696.0000
Epoch 10 | Loss: 1682301952.0000
Epoch 11 | Loss: 1533569408.0000
Epoch 12 | Loss: 1238838912.0000
Epoch 13 | Loss: 881848256.0000
Epoch 14 | Loss: 538159296.0000
Epoch 15 | Loss: 263885168.0000
Epoch 16 | Loss: 90594224.0000
Epoch 17 | Loss: 24701400.0000
Epoch 18 | Loss: 50901732.0000
Epoch 19 | Loss: 138597376.0000
Epoch 20 | Loss: 250568144.0000
Epoch 21 | Loss: 351900512.0000
Epoch 22 | Loss: 417009440.0000
Epoch 23 | Loss: 433377984.0000
Epoch 24 | Loss: 401725952.0000
Epoch 25 | Loss: 333285760.0000
Epoch 26 | Loss: 245454720.0000
Epoch 27 | Loss: 157058176.0000
Epoch 28 | Loss: 84196160.0000
Epoch 29 | Loss: 37301144.0000
Epoch 30 | Loss: 19753086.0000
Epoch 31 | Loss: