In [1]:
%load_ext autoreload
%autoreload 2

import os, sys, re, datetime, random, gzip, json, copy
import tqdm
import pandas as pd
import numpy as np
import glob
from pathlib import Path
from itertools import accumulate
import argparse
from time import time
from math import ceil
from collections import Counter

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn import Linear

import pytorch_lightning as pl
from pytorch_lightning.trainer.trainer import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.utilities.seed import seed_everything

from torch_geometric.data import Data, LightningLinkData
from torch_geometric.loader import DataLoader
from torch_geometric.nn import Sequential, HeteroConv, GINConv, GCNConv, SAGEConv, GATConv, TransformerConv

from sklearn.metrics import f1_score, accuracy_score, top_k_accuracy_score, roc_auc_score
from sklearn.utils import class_weight

PROJ_PATH = Path(os.path.join(re.sub("/TS-IDS.*$", '', os.getcwd()), 'TS-IDS'))
print(f'PROJ_PATH={PROJ_PATH}')
sys.path.insert(1, str(PROJ_PATH))
sys.path.insert(1, str(PROJ_PATH/'src'))
import utils
from utils import *
from dataset import build_datamodule
from trainer import build_trainer
from model import TSIDS
from pipeline import TSIDSPipeline

PROJ_PATH=/home/hoang/github/TS-IDS


In [2]:
ds_names = ['nf_bot_binary', 'nf_bot_multi', 'nf_ton_binary', 'nf_ton_multi']
for name in ds_names:
    print(name)
    ###
    config_path = str(PROJ_PATH / f'src/config/{name}.json')
    data_config = utils.read_json(config_path)
    g_data = pd.read_pickle(
        os.path.join(data_config['root'], data_config['ds_name']+'.pkl'))
    x = torch.tensor(g_data['n_features'], dtype=torch.float)
    edge_index = torch.tensor(g_data['edge_index'], dtype=torch.long)
    edge_attr = torch.tensor(g_data['e_features'], dtype=torch.float)
    y = torch.tensor(g_data['node_label'], dtype=torch.long)
    input_train_edges = torch.tensor(g_data['edge_index'][:, np.where(g_data['tvt']=='train')[0]], dtype=torch.long)
    input_train_labels = torch.tensor(g_data['edge_label'][np.where(g_data['tvt']=='train')[0]], dtype=torch.long)
    input_val_edges = torch.tensor(g_data['edge_index'][:, np.where(g_data['tvt']=='val')[0]], dtype=torch.long)
    input_val_labels = torch.tensor(g_data['edge_label'][np.where(g_data['tvt']=='val')[0]], dtype=torch.long)
    input_test_edges = torch.tensor(g_data['edge_index'][:, np.where(g_data['tvt']=='test')[0]], dtype=torch.long)
    input_test_labels = torch.tensor(g_data['edge_label'][np.where(g_data['tvt']=='test')[0]], dtype=torch.long)
    ###
    print(x.shape, edge_index.shape, edge_attr.shape)
    print(torch.isnan(x).any(), torch.isnan(edge_index).any(), torch.isnan(edge_attr).any())
    ###
    classes = np.unique(input_train_labels)
    y = input_train_labels.cpu().numpy()
    class_weights = class_weight.compute_class_weight(
        class_weight='balanced', classes=classes, y=y)
    print(classes)
    print(class_weights)
    print(Counter(input_train_labels.cpu().numpy()))
    print('\n')

nf_bot_binary
torch.Size([77177, 8]) torch.Size([2, 600100]) torch.Size([600100, 8])
tensor(False) tensor(False) tensor(False)
[0 1]
[21.6517535   0.51181935]
Counter({1: 293121, 0: 6929})


nf_bot_multi
torch.Size([77177, 32]) torch.Size([2, 600100]) torch.Size([600100, 8])
tensor(False) tensor(False) tensor(False)
[0 1 2 3 4]
[ 8.6607014   2.10465402  2.12086941  0.25499386 61.61190965]
Counter({3: 235339, 1: 28513, 2: 28295, 0: 6929, 4: 974})


nf_ton_binary
torch.Size([169562, 8]) torch.Size([2, 1379274]) torch.Size([1379274, 8])
tensor(False) tensor(False) tensor(False)
[0 1]
[2.55158762 0.62185685]
Counter({1: 554499, 0: 135139})


nf_ton_multi
torch.Size([169562, 72]) torch.Size([2, 1379274]) torch.Size([1379274, 8])
tensor(False) tensor(False) tensor(False)
[0 1 2 3 4 5 6 7 8 9]
[5.10317525e-01 8.01531846e+00 4.23311543e-01 7.82612347e+00
 2.94324661e-01 1.08433648e+02 8.77558344e-01 8.51404938e+02
 6.40272955e+00 1.38531598e+00]
Counter({4: 234312, 2: 162915, 0: 135139, 6: 785

In [1]:
# ds_names = ['nf_bot_binary', 'nf_bot_multi', 'nf_ton_binary', 'nf_ton_multi']
# name = ds_names[0]
# config_path = str(PROJ_PATH / f'src/config/{name}.json')
# tsids = TSIDSPipeline(config_path=config_path)
# data_module, model_module, trainer = tsids.initialize()
# tsids.train(data_module, model_module, trainer)