In [None]:
import dgl
import dgl.function as fn
import torch as th
import torch.nn as nn
import torch.nn.functional as F
import networkx as nx
import pandas as pd
import socket
import struct
import random
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [None]:
data = pd.read_csv('./Dataset/NF-CSE-CIC-IDS2018.csv')
print(data['Attack'].value_counts())

In [None]:
data.drop(columns=['PROTOCOL', 'L7_PROTO', 'TCP_FLAGS', 'CLIENT_TCP_FLAGS', 'SERVER_TCP_FLAGS', 'ICMP_TYPE', 'ICMP_IPV4_TYPE', \
                   'DNS_QUERY_ID', 'DNS_QUERY_TYPE', 'DNS_TTL_ANSWER', 'FTP_COMMAND_RET_CODE'],inplace=True)


In [None]:
print(data.Label.value_counts())

In [None]:
data['IPV4_SRC_ADDR'] = data.IPV4_SRC_ADDR.apply(str)
data['L4_SRC_PORT'] = data.L4_SRC_PORT.apply(int)
data['L4_SRC_PORT'] = data.L4_SRC_PORT.apply(str)
data['IPV4_DST_ADDR'] = data.IPV4_DST_ADDR.apply(str)
data['L4_DST_PORT'] = data.L4_DST_PORT.apply(int)
data['L4_DST_PORT'] = data.L4_DST_PORT.apply(str)
data['IPV4_SRC_ADDR'] = data['IPV4_SRC_ADDR'] + ':' + data['L4_SRC_PORT']
data['IPV4_DST_ADDR'] = data['IPV4_DST_ADDR'] + ':' + data['L4_DST_PORT']
data.rename(columns={"IPV4_SRC_ADDR": "saddr"},inplace = True)
data.rename(columns={"IPV4_DST_ADDR": "daddr"},inplace = True)
data.drop(columns=['L4_SRC_PORT', 'L4_DST_PORT'],inplace=True)

In [None]:
print(data.head)

In [None]:
label_ground_truth = data[["saddr", "daddr", "Label"]]
class_ground_truth = data[["saddr", "daddr", "Attack"]]
# data = pd.get_dummies(data, columns = ['flgs_number','state_number', 'proto_number']) # One Hot Encoding for categorical data

In [None]:
data = data.reset_index()
data.replace([np.inf, -np.inf], np.nan,inplace = True)
data.fillna(0,inplace = True)
data.drop(columns=['index'],inplace=True)
print(data.head)

In [None]:
cols_to_norm = list(data.columns[2:30])
print(cols_to_norm)
scaler = StandardScaler()
data[cols_to_norm] = scaler.fit_transform(data[cols_to_norm])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data, data['Label'], stratify = data['Label'], test_size=0.2, random_state=21)

In [None]:
X_train['h'] = X_train[ cols_to_norm ].values.tolist()

G_nx = nx.from_pandas_edgelist(X_train, "saddr", "daddr", ['h', 'Label', 'Attack'], create_using=nx.MultiDiGraph())

G = dgl.from_networkx(G_nx, node_attrs=['h'], edge_attrs=['Label', 'Attack'])

num_nodes = G.num_nodes()
num_edges = G.num_edges()

G.ndata['feat'] = th.ones(num_nodes, len(X_train['h'].iloc[0]))

edge_feats = []
edge_labels = []
edge_classes = []

for u, v, data in G_nx.edges(data=True):
    edge_feats.append(th.tensor(data['h'], dtype=th.float32))
    edge_labels.append(th.tensor(data['Label'], dtype=th.long))
    edge_classes.append(th.tensor(data['Attack'], dtype=th.long))

G.edata['feat'] = th.stack(edge_feats)
G.edata['label'] = th.stack(edge_labels)
G.edata['class'] = th.stack(edge_classes)

print("Number of edges in G:", G.num_edges())
print("Number of nodes in G:", G.num_nodes())
print("Shape of node features in G:", G.ndata['feat'].shape)
print("Shape of edge features in G:", G.edata['feat'].shape)
print("Shape of edge labels in G:", G.edata['label'].shape)
print("Shape of edge classes in G:", G.edata['class'].shape)

In [None]:
print(X_test.Label.value_counts())

In [None]:
X_train['h'] = X_train[ cols_to_norm ].values.tolist()

In [None]:
class EGraphSAGE(nn.Module):
    def __init__(self, node_in_feats, edge_in_feats, hid_feats, out_feats):
        super(EGraphSAGE, self).__init__()
        self.layers = nn.ModuleList()
        self.layers.append(dgl.nn.SAGEConv(in_feats=node_in_feats, out_feats=hid_feats, aggregator_type='mean'))
        self.layers.append(dgl.nn.SAGEConv(in_feats=hid_feats + edge_in_feats, out_feats=hid_feats, aggregator_type='mean'))
        self.hid2out = nn.Linear(hid_feats, out_feats)
        self.act = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, blocks, nfeat, efeat):
        h = nfeat
        for l, layer in enumerate(self.layers):
            h = layer(blocks[0], h)
            if l != len(self.layers) - 1:
                h = th.cat([h, efeat[:h.shape[0]]], dim=-1)  # 假设efeat需要调整大小
                h = self.act(h)
                h = self.dropout(h)
        h = self.hid2out(h)
        return F.log_softmax(h, dim=-1)

In [None]:
device = th.device('cuda' if th.cuda.is_available() else 'cpu')

model = EGraphSAGE(node_in_feats=G.ndata['feat'].shape[1], 
                   edge_in_feats=G.edata['feat'].shape[1],
                   hid_feats=128, 
                   out_feats=2).to(device)

optimizer = th.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
from dgl.dataloading import EdgeDataLoader, MultiLayerFullNeighborSampler

def compute_accuracy(pred, labels):
    return (pred.argmax(1) == labels).float().mean().item()

sampler = MultiLayerFullNeighborSampler(2)
dataloader = EdgeDataLoader(G, th.arange(G.num_edges()), sampler, batch_size=64)

for epoch in range(5):
    print(f'epoch : {epoch}')
    all_preds = []
    all_labels = []
    model.train()
    for input_nodes, edge_subgraph, blocks in dataloader:
        try:
            edge_subgraph = edge_subgraph.to(device)
            blocks = [b.to(device) for b in blocks]
            nfeat = blocks[0].srcdata['feat']
            efeat = edge_subgraph.edata['feat']
            edge_label = edge_subgraph.edata['label']
            
            out = model(blocks, nfeat, efeat)
            loss = criterion(out, edge_label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            all_preds.append(out)
            all_labels.append(edge_label)
        except Exception as e:
            print(f"Error during training: {e}")
            continue
    
    all_preds = th.cat(all_preds)
    all_labels = th.cat(all_labels)
    epoch_accuracy = compute_accuracy(all_preds, all_labels)
    print(f'Epoch {epoch}, Loss: {loss:.4f}, Accuracy: {epoch_accuracy:.4f}')
    print(all_labels.shape)

print("Training is over")

In [None]:
th.save(model.state_dict(), "./Weights/GNN_model_weights_CICIDS2018_subset_2.pth")

In [None]:
from sklearn.preprocessing import LabelEncoder

test_le = LabelEncoder()
X_test['Attack'] = test_le.fit_transform(X_test['Attack'])
test_class_map = test_le.classes_
print(test_class_map)
print("Attack label mapping:", dict(zip(test_class_map, range(len(test_class_map)))))

In [None]:
X_test['h'] = X_test[ cols_to_norm ].values.tolist()

G_nx_test = nx.from_pandas_edgelist(X_test, "saddr", "daddr", ['h', 'Label', 'Attack'], create_using=nx.MultiDiGraph())

G_test = dgl.from_networkx(G_nx_test, node_attrs=['h'], edge_attrs=['Label', 'Attack'])

test_num_nodes = G_test.num_nodes()
test_num_edges = G_test.num_edges()

G_test.ndata['feat'] = th.ones(test_num_nodes, len(X_test['h'].iloc[0]))

test_edge_feats = []
test_edge_labels = []
test_edge_classes = []

for u, v, data in G_nx_test.edges(data=True):
    test_edge_feats.append(th.tensor(data['h'], dtype=th.float32))
    test_edge_labels.append(th.tensor(data['Label'], dtype=th.long))
    test_edge_classes.append(th.tensor(data['Attack'], dtype=th.long))

G_test.edata['feat'] = th.stack(test_edge_feats)
G_test.edata['label'] = th.stack(test_edge_labels)
G_test.edata['class'] = th.stack(test_edge_classes)

print("Number of edges in G_test:", G_test.num_edges())
print("Number of nodes in G_test:", G_test.num_nodes())
print("Shape of node features in G_test:", G_test.ndata['feat'].shape)
print("Shape of edge features in G_test:", G_test.edata['feat'].shape)
print("Shape of edge labels in G_test:", G_test.edata['label'].shape)
print("Shape of edge classes in G_test:", G_test.edata['class'].shape)

In [None]:
def compute_accuracy(pred, labels):
    return (pred.argmax(1) == labels).float().mean().item()

new_model_2 = EGraphSAGE(node_in_feats=G_test.ndata['feat'].shape[1], 
                         edge_in_feats=G_test.edata['feat'].shape[1],
                         hid_feats=128, 
                         out_feats=2).to(device)

new_model_2.load_state_dict(th.load("./Weights/GNN_model_weights_CICIDS2018_subset_2.pth", weights_only=True))

sampler = MultiLayerFullNeighborSampler(2)
test_dataloader = EdgeDataLoader(G_test, th.arange(G_test.num_edges()), sampler, batch_size=64)

new_model_2.eval()

all_test_preds = []
all_test_labels = []
all_test_classes = []
attack_class_performance = {attack_type: {'correct': 0, 'incorrect': 0} for attack_type in test_class_map}

print("inference start")
with th.no_grad():
    for input_nodes, edge_subgraph, blocks in test_dataloader:
        try:
            edge_subgraph = edge_subgraph.to(device)
            blocks = [b.to(device) for b in blocks]
            nfeat = blocks[0].srcdata['feat']
            efeat = edge_subgraph.edata['feat']
            edge_label = edge_subgraph.edata['label']
            edge_class = edge_subgraph.edata['class']
            
            out = new_model_2(blocks, nfeat, efeat)
            
            all_test_preds.append(out)
            all_test_labels.append(edge_label)
            all_test_classes.append(edge_class)

            pred = out.argmax(dim=1)

            for i in range(len(pred)):
                true_label = edge_label[i].item()
                predicted_label = pred[i].item()
                attack_type = test_le.inverse_transform([edge_class[i].item()])[0]

                if true_label == 0 and attack_type != 'Benign':
                    print('this sample is Benign but label is wrong')
                
                if true_label == predicted_label:
                    attack_class_performance[attack_type]['correct'] += 1
                else:
                    attack_class_performance[attack_type]['incorrect'] += 1
        except Exception as e:
            print(f"Error during inference: {e}")
            continue

print("inference done")
all_test_preds = th.cat(all_test_preds).to(device)
all_test_labels = th.cat(all_test_labels).to(device)
all_test_classes = th.cat(all_test_classes).to(device)

test_accuracy = compute_accuracy(all_test_preds, all_test_labels)
print(f'Test Accuracy: {test_accuracy:.4f}')

In [None]:
th.save(model.state_dict(), "./Weights/GNN_model_weights_CICIDS2018_subset_2.pth")

In [None]:
from sklearn.metrics import confusion_matrix

pred_labels = all_test_preds.argmax(dim=1)

pred_labels = pred_labels.cpu()
all_test_labels = all_test_labels.cpu()

cm = confusion_matrix(all_test_labels, pred_labels)

TN, FP, FN, TP = cm.ravel()

print(f'True Positives (TP): {TP}')
print(f'False Positives (FP): {FP}')
print(f'True Negatives (TN): {TN}')
print(f'False Negatives (FN): {FN}')

accuracy = (TP + TN) / (TP + TN + FP + FN)
precision = TP / (TP + FP) if (TP + FP) > 0 else 0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1_score:.4f}')

In [None]:
sum = 0
for attack_type, performance in attack_class_performance.items():
    total_samples = performance['correct'] + performance['incorrect']
    if attack_type != 'Benign':
        sum += total_samples
    accuracy = performance['correct'] / total_samples if total_samples > 0 else 0
    print(f"Attack Type: {attack_type}, Accuracy: {accuracy:.4f}, Total Samples: {total_samples}, Correct Samples: {performance['correct']}, Incorrect Samples: {performance['incorrect']}")
print(sum)