<a href="https://colab.research.google.com/github/Zoro1092000/ZOZO/blob/master/CanTrain_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# I. Pip & Import.

In [1]:
import os
import os.path as osp
import torch
import sys

os.environ['TORCH'] = torch.__version__

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install torch-geometric==1.4.3
!pip install torch-cluster -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install deepdish==0.3.5

import torch.nn as nn
from torch.nn import Parameter
from torch_geometric.utils import scatter_
from torch_geometric.nn.inits import glorot, zeros
from torch_scatter import scatter_add
from torch.utils.data import DataLoader, Dataset

from itertools import chain
import pickle
import h5py
import deepdish as dd
import numpy as np
from tqdm import tqdm
import inspect
import time
import math
import random

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-1.12.1+cu113.html
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# II. Data

## 2.1. Data Utils

In [2]:
def h5group_to_dict(h5group):
    group_dict = {k: v[()] for k, v in chain(h5group.items(), h5group.attrs.items())}
    return group_dict

def sub_dict(full_dict, *keys, to_tensor):
    return {k: torch.tensor(full_dict[k]) if to_tensor else full_dict[k] for k in keys if k in full_dict}

def build_graph_from_dict_pyg(graph_dict, to_tensor=True):
    from torch_geometric.data import Data

    g = Data(**sub_dict(graph_dict, 'edge_index', 'x', 'y', 'edge_attr', 'edge_y', to_tensor=to_tensor))
    return g



## 2.2. Data Loader

In [3]:
class GraphDataLoader(DataLoader):
    def __init__(self, dataset, batch_size=1, shuffle=False, num_workers=0):

        def collate_graph(graph_obj_list):
            from torch_geometric.data import Batch
            batch = Batch.from_data_list(graph_obj_list)
            return batch

        super().__init__(
            dataset,
            batch_size=batch_size,
            shuffle=shuffle,
            collate_fn=collate_graph,
            num_workers=num_workers)


## 2.3. Botnet Dataset

In [4]:
def files_exist(files):
    return all([osp.exists(f) for f in files])

def extract_tar(path, folder, mode='r:gz', log=True):
    print('Extracting', path)
    import tarfile
    with tarfile.open(path, mode) as f:
        f.extractall(folder)

def makedirs(path):
    os.makedirs(osp.expanduser(osp.normpath(path)))
# TODO: add degree calculation in preprocessing
class BotnetDataset(Dataset):

    def __init__(self, name='chord', root='data/botnet', split='train', graph_format='pyg', split_idx=None, add_nfeat_ones=True,
                 in_memory=True):
        super().__init__()
        assert name in ['chord', 'debru', 'kadem', 'leet', 'c2', 'p2p']
        assert split in ['train', 'val', 'test']

        if isinstance(root, str):
            root = osp.expanduser(osp.normpath(root))

        self.name = name
        self.root = root
        self.split = split
        self.split_idx = split_idx
        self.add_nfeat_ones = add_nfeat_ones

        self.process()

        self.in_memory = in_memory
        self._graph_format = graph_format
        if split == 'train':
            self.path = self.processed_paths[0]
        elif split == 'val':
            self.path = self.processed_paths[1]
        elif split == 'test':
            self.path = self.processed_paths[2]

        if in_memory:
            self.data = dd.io.load(self.path)  # dictionary
            self.data_type = 'dict'
            self.num_graphs = self.data['num_graphs']
        else:
            # self.data = h5py.File(self.path, 'r')
            self.data = None    # defer opening file in each process to make multiprocessing work
            self.data_type = 'file'
            with h5py.File(self.path, 'r') as f:
                self.num_graphs = f.attrs['num_graphs']

    @property
    def raw_dir(self):
        return osp.join(self.root, 'raw')

    @property
    def processed_dir(self):
        return osp.join(self.root, 'processed')

    @property
    def raw_file_names(self):
        return ['botnet_' + self.name + '.tar.gz', self.name + '_raw.hdf5', self.name + '_split_idx.pkl']

    @property
    def processed_file_names(self):
        return [self.name + '_' + s + '.hdf5' for s in ('train', 'val', 'test')]

    @property
    def raw_paths(self):
        return [osp.join(self.raw_dir, f) for f in self.raw_file_names]

    @property
    def processed_paths(self):
        return [osp.join(self.processed_dir, f) for f in self.processed_file_names]

    def process(self):
        if files_exist(self.processed_paths):
            return

        if not files_exist(self.raw_paths[1:3]):
            assert osp.exists(self.raw_paths[0])
            path = extract_tar(self.raw_paths[0], self.raw_dir)

        print('Processing...')
        makedirs(self.processed_dir)

        if self.split_idx is None:
            # default data split
            split_idx = pickle.load(open(self.raw_paths[2], 'rb'))

        with h5py.File(self.raw_paths[1], 'r') as f:
            for path, split in zip(self.processed_paths, ('train', 'val', 'test')):
                print(f'writing {split} set ' + '-' * 10)
                ori_graph_ids = split_idx[split]
                with h5py.File(path, 'w') as g:
                    num_nodes_sum = 0
                    num_edges_sum = 0
                    num_evils_sum = 0
                    if 'num_evil_edges_avg' in f.attrs:
                        num_evil_edges_sum = 0
                        num_evil_edges_flag = True
                    else:
                        num_evil_edges_sum = None
                        num_evil_edges_flag = False

                    for n, i in tqdm(enumerate(ori_graph_ids)):
                        f.copy(str(i), g, name=str(n))
                        if self.add_nfeat_ones:
                            g[str(n)].create_dataset('x',
                                                     shape=(g[str(n)].attrs['num_nodes'], 1),
                                                     dtype='f4',
                                                     data=np.ones((g[str(n)].attrs['num_nodes'], 1)))

                        num_nodes_sum += f[str(i)].attrs['num_nodes']
                        num_edges_sum += f[str(i)].attrs['num_edges']
                        num_evils_sum += f[str(i)].attrs['num_evils']
                        if num_evil_edges_flag:
                            num_evil_edges_sum += f[str(i)].attrs['num_evil_edges']

                    g.attrs['num_graphs'] = n + 1
                    g.attrs['num_nodes_avg'] = num_nodes_sum / (n + 1)
                    g.attrs['num_edges_avg'] = num_edges_sum / (n + 1)
                    g.attrs['num_evils_avg'] = num_evils_sum / (n + 1)
                    if num_evil_edges_flag:
                        g.attrs['num_evil_edges_avg'] = num_evil_edges_sum / (n + 1)
                    g.attrs['is_directed'] = f.attrs['is_directed']
                    g.attrs['contains_self_loops'] = f.attrs['contains_self_loops']
                    g.attrs['ori_graph_ids'] = ori_graph_ids

                print('{} split --- number of graphs: {}, data saved at {}.'.format(split, n + 1, path))

        print('Done!')

    def __len__(self):
        return self.num_graphs

    def __getitem__(self, index):
        if self.data_type == 'dict':
            graph_dict = self.data[str(index)]
        elif self.data_type == 'file':
            if self.data is None:
                # only open once in each process
                self.data = h5py.File(self.path, 'r')
            graph_dict = h5group_to_dict(self.data[str(index)])
        else:
            raise ValueError

        # graph_format == 'pyg':
        return build_graph_from_dict_pyg(graph_dict)


    def __iter__(self):
        for i in range(self.num_graphs):
            yield self[i]

    def __repr__(self):
        return f'{self.__class__.__name__}(topology: {self.name} | split: {self.split} | ' \
               f'#graphs: {len(self)} | graph format: {self.graph_format})'


# III. Eval

## 3.1. Metrics

In [5]:
def f1(target, pred, label):
    # F1 = 2 * (precision * recall) / (precision + recall)
    tp = np.sum((target==label) & (pred==label))
    fp = np.sum((target!=label) & (pred==label))
    fn = np.sum((pred!=label) & (target==label))
    
    if tp+fp==0 or tp+fn==0:
      return np.nan

    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    
    if precision+recall==0:
      return np.nan
      
    f1 = 2 * (precision * recall) / (precision + recall)
    return f1

def f1_macro(pred, target):
    return np.mean([f1(target, pred, label) for label in range(0, 2)])


def accuracy(pred, target):
    return (pred == target).sum().item() / len(target)


def true_positive(pred, target):
    return (target[pred == 1] == 1).sum().item()


def false_positive(pred, target):
    return (target[pred == 1] == 0).sum().item()


def true_negative(pred, target):
    return (target[pred == 0] == 0).sum().item()


def false_negative(pred, target):
    return (target[pred == 0] == 1).sum().item()


def recall(pred, target):
    try:
        return true_positive(pred, target) / (target == 1).sum().item()
    except:  # divide by zero
        return -1


def precision(pred, target):
    try:
        prec = true_positive(pred, target) / (pred == 1).sum().item()
        return prec
    except:  # divide by zero
        return -1


def f1_score(pred, target):
    prec = precision(pred, target)
    rec = recall(pred, target)
    try:
        return 2 * (prec * rec) / (prec + rec)
    except:
        return 0


def false_positive_rate(pred, target):
    try:
        return false_positive(pred, target) / (target == 0).sum().item()
    except:  # divide by zero
        return -1


def false_negative_rate(pred, target):
    try:
        return false_negative(pred, target) / (target == 1).sum().item()
    except:  # divide by zero
        return -1


## 3.2. Evaluation

In [6]:
def eval_metrics(target, pred_prob, threshold=0.5):
    if isinstance(target, torch.Tensor):
        target = target.cpu().numpy()
    if isinstance(pred_prob, torch.Tensor):
        pred_prob = pred_prob.cpu().numpy()

    pred = (pred_prob >= threshold).astype(int)

    acc = accuracy(pred, target)
    fpr = false_positive_rate(pred, target)
    fnr = false_negative_rate(pred, target)
    rec = recall(pred, target)
    prc = precision(pred, target)
    f1 = f1_score(pred, target)
    f1macro = f1_macro(pred, target)
    result_dict = {'acc': acc, 'fpr': fpr, 'fnr': fnr, 'rec': rec, 'prc': prc, 'f1': f1, 'f1_macro': f1macro}

    return result_dict


def dict_value_add(dict1, dict2):
    result = {key: dict1.get(key, 0) + dict2.get(key, 0)
              for key in set(dict1) | set(dict2)}
    return result


def dict_value_div(dict, n):
    result = {key: value / n for key, value in dict.items()}
    return result


def eval_predictor(dataset, predictor):
    result_dict_avg = {}
    loss_avg = 0

    for data in dataset:
        # prediction
        try:
            pred_prob, loss = predictor(data)
            loss_avg += loss
        except ValueError:  # if "too many values to unpack"
            pred_prob = predictor(data)

        # get the ground truth target
        # graph_format == 'pyg':
        target = data.y

        # compute the evaluation metrics
        result_dict = eval_metrics(target, pred_prob)

        result_dict_avg = dict_value_add(result_dict_avg, result_dict)

    # average the metrics across all graphs in the dataset as final results
    result_dict_avg = dict_value_div(result_dict_avg, len(dataset))
    loss_avg = loss_avg / len(dataset)

    return result_dict_avg, loss_avg


# =================================================================================================================
# some examples of the 'predictor' model wrapper to be fed into the above evaluation function (for PyG Data format)
# =================================================================================================================
class PygRandomPredictor:
    def __init__(self):
        # torch.manual_seed(0)
        pass

    def __call__(self, data):
        pred_prob = torch.rand(len(data.y))
        return pred_prob


class PygModelPredictor:
    def __init__(self, model, loss_fcn=torch.nn.CrossEntropyLoss()):
        self.model = model
        self.loss_fcn = loss_fcn
        self.device = next(model.parameters()).device

    def __call__(self, data):
        self.model.eval()
        data = data.to(self.device)
        with torch.no_grad():
            # custom the below line to adjust to your model's input format for forward pass
            out = self.model(data.x, data.edge_index)
            loss = self.loss_fcn(out, data.y.long())
            pred_prob = torch.softmax(out, dim=1)[:, 1]
        return pred_prob, loss.float()


# IV. Optimization

## 4.1. Early stop

In [7]:
class EarlyStopping:
    def __init__(self, patience=7, mode='min', verbose=False, logger=None):

        assert mode in ['min', 'max']

        self.patience = patience
        self.mode = mode
        self.verbose = verbose
        self.logging = logger.info if logger else print
        self.counter = 0
        self.best = None
        self.improved = False
        self.early_stop = False

    def __call__(self, val_metric):

        if self.best is None:
            self.best = val_metric
            self.improved = True
        elif (self.mode == 'min' and val_metric < self.best) or \
             (self.mode == 'max' and val_metric > self.best):
            self.best = val_metric
            self.improved = True
            self.counter = 0
        else:
            self.improved = False
            self.counter += 1
            if self.verbose:
                self.logging(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True


## 4.2. Train Utils

In [8]:
def time_since(start):
    now = time.time()
    s = now - start
    m = math.floor(s / 60)
    s -= m * 60
    h = math.floor(m / 60)
    m -= h * 60
    if h == 0:
        if m == 0:
            return '%ds' % s
        else:
            return '%dm %ds' % (m, s)
    else:
        return '%dh %dm %ds' % (h, m, s)


# V. Model

## 5.1. Activation 

In [9]:
def activation(act, negative_slope=0.2):
    activations = nn.ModuleDict([
        ['lrelu', nn.LeakyReLU(negative_slope)],
        ['relu', nn.ReLU()],
        ['elu', nn.ELU()],
        ['none', nn.Identity()],
    ])
    return activations[act]

## 5.2. GCN base model

In [10]:
class NodeModelBase(nn.Module):
    def __init__(self, in_channels, out_channels, deg_norm='none', aggr='add',
                 *args, **kwargs):
        assert aggr in ['add', 'mean', 'max']

        super(NodeModelBase, self).__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.deg_norm = deg_norm
        self.aggr = aggr

    @staticmethod
    def degnorm_const(edge_index=None, num_nodes=None, deg=None, method='sm', device=None):

        if device is None and edge_index is not None:
            device = edge_index.device
 
        edge_weight = torch.ones((edge_index.size(1),), device=device)

        row, col = edge_index
        
        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)

        # 'sm'
        deg_inv_sqrt = deg.pow(-0.5)

        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

        norm = (deg_inv_sqrt[row] * deg_inv_sqrt[col])  # size (E,)

        return norm

    def forward(self, x, edge_index, deg=None):
        return x

    def num_parameters(self):
        if not hasattr(self, 'num_para'):
            self.num_para = sum([p.nelement() for p in self.parameters()])
        return self.num_para

    def __repr__(self):
        return '{} (in_channels: {}, out_channels: {}, deg_norm: {},' \
               'aggr: {} | number of parameters: {})'.format(
            self.__class__.__name__, self.in_channels, self.out_channels,
            self.deg_norm, self.aggr, self.num_parameters())

class NodeModelMLP(NodeModelBase):
    def __init__(self, in_channels, out_channels, deg_norm='sm', aggr='add',
                 bias=True, mlp_nlay=2, mlp_nhid=32, mlp_act='relu'):
        super(NodeModelMLP, self).__init__(in_channels, out_channels, deg_norm, aggr)

        in_features = in_channels

        if mlp_nlay == 1:
            self.mlp = nn.Linear(in_features, out_channels, bias=bias)
        elif mlp_nlay >= 2:
            self.mlp = [nn.Linear(in_features, mlp_nhid, bias=bias)]
            for i in range(mlp_nlay - 1):
                self.mlp.append(activation(mlp_act))
                if i < mlp_nlay - 2:
                    self.mlp.append(nn.Linear(mlp_nhid, mlp_nhid, bias=bias))
                else:
                    # last layer, and we do not apply non-linear activation after
                    self.mlp.append(nn.Linear(mlp_nhid, out_channels, bias=bias))
            self.mlp = nn.Sequential(*self.mlp)

        # self.reset_parameters()

    def reset_parameters(self, initrange=0.1):
        # TODO: this only works for 1-layer mlp
        nn.init.uniform_(self.mlp.weight, -initrange, initrange)
        if self.mlp.bias is not None:
            nn.init.constant_(self.mlp.bias, 0)

        # self.mlp.reset_parameters()    # this was done automatically when nn.Linear class was initialized

    def forward(self, x, edge_index, edge_attr=None, deg=None, **kwargs):
        # calculate the degree normalization factors, of size (E,)
        # or of size (N,) when `self.deg_norm` == 'rw' and `edge_weight` == None
        norm = self.degnorm_const(edge_index, num_nodes=x.size(0), deg=deg,
                                  method=self.deg_norm, device=x.device)

        # 'chuan hoa sm'
        # lift the features to source nodes, resulting size (E, C_out)
        x_j = torch.index_select(x, 0, edge_index[0])
        x_j = x_j * norm.view(-1, 1)  # norm.view(-1, 1) second dim set to 1 for broadcasting

        x_j = self.mlp(x_j)  # size (E, C_out)

        # aggregate the features into nodes, resulting size (N, C_out)
        # x_o = scatter_(self.aggr, x_j, edge_index[1], dim_size=x.size(0))
        x = scatter_(self.aggr, x_j, edge_index[1], dim_size=x.size(0))

        return x


## 5.3. GCN model

In [11]:
import torch.nn as nn

class GCNModel(nn.Module):
    def __init__(self, in_channels, enc_sizes, num_classes, non_linear='relu', non_linear_layer_wise='none',
                 residual_hop=None, dropout=0.0, final_type='none'):
        assert final_type in ['none', 'proj']
        super().__init__()

        self.in_channels = in_channels
        self.enc_sizes = [in_channels, *enc_sizes]
        self.num_layers = len(self.enc_sizes) - 1
        self.num_classes = num_classes
        self.residual_hop = residual_hop
        self.non_linear_layer_wise = non_linear_layer_wise
        self.final_type = final_type
        
        import torch
        import torch.nn.functional as F
        from torch.nn import Linear, Sequential, BatchNorm1d, ReLU, Dropout
        from torch_geometric.nn import GCNConv, GINConv
        from torch_geometric.nn import global_mean_pool, global_add_pool

        dim_h = 32
        self.conv0 = GINConv(
            Sequential(Linear(1, dim_h),
                       BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv1 = GINConv(
            Sequential(Linear(dim_h, dim_h),
                       BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv2 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv3 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv4 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv5 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv6 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv7 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv8 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv9 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv10 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))        
        self.conv11 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        
        self.gcn_net = nn.ModuleList([self.conv0, self.conv1, self.conv2, self.conv3, 
                                      self.conv4, self.conv5, self.conv6, self.conv7, 
                                      self.conv8, self.conv9, self.conv10, self.conv11])

        self.dropout = nn.Dropout(dropout)

        if residual_hop is not None and residual_hop > 0:
            self.residuals = nn.ModuleList([nn.Linear(self.enc_sizes[i], self.enc_sizes[j], bias=False)
                                            if self.enc_sizes[i] != self.enc_sizes[j]
                                            else
                                            nn.Identity()
                                            for i, j in zip(range(0, len(self.enc_sizes), residual_hop),
                                                            range(residual_hop, len(self.enc_sizes), residual_hop))])
            self.num_residuals = len(self.residuals)

        self.non_linear = activation(non_linear)

        if self.final_type == 'none':
            self.final = nn.Identity()
        elif self.final_type == 'proj':
            self.final = nn.Linear(self.enc_sizes[-1], num_classes)
        else:
            raise ValueError

    def reset_parameters(self):
        for net in self.gcn_net:
            net.reset_parameters()
        if self.residual_hop is not None:
            for net in self.residuals:
                net.reset_parameters()
        if self.final_type != 'none':
            self.final.reset_parameters()

    def forward(self, x, edge_index, edge_attr=None, deg=None):
        xr = None
        add_xr_at = -1

        for n, net in enumerate(self.gcn_net):
            # pass to a GCN layer with non-linear activation
            xo = net(x, edge_index)
            xo = self.dropout(xo)
            # deal with residual connections
            if self.residual_hop is not None and self.residual_hop > 0:
                if n % self.residual_hop == 0 and (n // self.residual_hop) < self.num_residuals:
                    xr = self.residuals[n // self.residual_hop](x)
                    add_xr_at = n + self.residual_hop - 1
                if n == add_xr_at:
                    if n < self.num_layers - 1:  # before the last layer
                        # non_linear is applied both after each layer (by default: 'none') and after residual sum
                        xo = self.non_linear(xo + xr)
                    else:  # the last layer (potentially the output layer)
                        if self.final_type == 'none':
                            # no non_linear is important for binary classification since this is to be passed to sigmoid
                            # function to calculate loss, and ReLU will directly kill all the negative parts
                            xo = xo + xr
                        else:
                            xo = self.non_linear(xo + xr)
            else:
                if n < self.num_layers - 1:  # before the last layer
                    xo = self.non_linear(xo)
                else:
                    if self.final_type == 'none':
                        pass
                    else:
                        xo = self.non_linear(xo)

            x = xo
        # size of x: (B * N, self.enc_sizes[-1]) -> (B * N, num_classes)
        x = self.final(x)

        return x


class GCNLayer(nn.Module):
    def __init__(self, in_channels, out_channels, aggr='add',
                 bias=True, non_linear='relu'):
        super().__init__()
        self.gcn = NodeModelMLP(in_channels,
                                out_channels,
                                aggr=aggr,
                                bias=bias)

        self.non_linear = activation(non_linear)

    def forward(self, x, edge_index, edge_attr=None, deg=None):
        xo = self.gcn(x, edge_index, edge_attr, deg)
        xo = self.non_linear(xo)
        return xo



# VI. Split train/val/test

In [12]:
data_dir = './data/botnet'
data_name = 'chord' # 'chord', 'debru', 'kadem', 'leet', 'c2', 'p2p'
batch_size = 2
in_memory = False
shuffle = False

# ========== load the dataset
print('loading dataset...')

train_ds = BotnetDataset(name=data_name, root=data_dir, split='train',
                         in_memory=bool(in_memory), graph_format='pyg')
val_ds = BotnetDataset(name=data_name, root=data_dir, split='val',
                       in_memory=bool(in_memory), graph_format='pyg')
test_ds = BotnetDataset(name=data_name, root=data_dir, split='test',
                        in_memory=bool(in_memory), graph_format='pyg')
train_loader = GraphDataLoader(train_ds, batch_size=batch_size, shuffle=bool(shuffle), num_workers=0)

loading dataset...


# VII. Train

In [None]:
# ============== some default parameters =============
devid = 0
seed = 0
logmode = 'w'
log_interval = 96

in_channels = 1
enc_sizes = [32] * 12
act = 'relu' # 'none', 'lrelu', 'relu', 'elu'
layer_act = 'none' # 'none', 'lrelu', 'relu', 'elu'

residual_hop = 1
num_classes = 2
final = 'proj'    # 'none', 'proj'

deg_norm = 'sm'
aggr = 'add' # 'add', 'mean', 'max'
dropout = 0.0
bias = True


lr = 0.005 # learning rate
weight_decay = 5e-4
epochs = 20
early_stop = True
save_dir = './saved_models'
save_name = 'temp.pt'

# ====================================================

def train(model, train_loader, val_dataset, test_dataset, optimizer, criterion,
          scheduler=None):
    device = next(model.parameters()).device
    predictor = PygModelPredictor(model)

    early_stopper = EarlyStopping(patience=5, mode='min', verbose=True)

    best_epoch = 0
    min_avg_lost = np.inf
    start = time.time()
    for ep in range(epochs):
        loss_avg_train = 0
        num_train_graph = 0
        model.train()
        for n, batch in enumerate(train_loader):
            batch.to(device)

            optimizer.zero_grad()

            x = model(batch.x, batch.edge_index)
            loss = criterion(x, batch.y.long())

            loss_avg_train += float(loss)
            num_train_graph += batch.num_graphs

            loss.backward()
            optimizer.step()

            if num_train_graph % log_interval == 0 or n == len(train_loader) - 1:
                with torch.no_grad():
                    # pred = x.argmax(dim=1)
                    pred_prob = torch.softmax(x, dim=1)[:, 1]
                    y = batch.y.long()
                    result_dict = eval_metrics(y, pred_prob)
                print(f'epoch: {ep + 1}, passed number of graphs: {num_train_graph}, '
                        f'train running loss: {loss_avg_train / num_train_graph:.5f} (passed time: {time_since(start)})')
                print(' ' * 10 + ', '.join(['{}: {:.5f}'.format(k, v) for k, v in result_dict.items()]))

        result_dict_avg, loss_avg = eval_predictor(val_dataset, predictor)
        print(f'Validation --- epoch: {ep + 1}, loss: {loss_avg:.5f}')
        print(' ' * 10 + ', '.join(['{}: {:.5f}'.format(k, v) for k, v in result_dict_avg.items()]))

        if scheduler is not None:
            scheduler.step(loss_avg)

        if loss_avg < min_avg_lost:
            torch.save(model, os.path.join(save_dir, save_name))
            print(f'Better model saved at {os.path.join(save_dir, save_name)}.')
            best_epoch = ep
            min_avg_lost = loss_avg

    best_model = torch.load(os.path.join(save_dir, save_name))
    print('*' * 12 + f' best model obtained after epoch {best_epoch + 1}, '
                       f'saved at {os.path.join(save_dir, save_name)} ' + '*' * 12)
    
    predictor = PygModelPredictor(best_model)

    result_dict_avg, loss_avg = eval_predictor(test_dataset, predictor)
    print(f'Testing --- loss: {loss_avg:.5f}')
    print(' ' * 10 + ', '.join(['{}: {:.5f}'.format(k, v) for k, v in result_dict_avg.items()]))


if __name__ == '__main__':
    os.makedirs(save_dir, exist_ok=True)

    # ========== random seeds and device
    random.seed(seed)
    torch.manual_seed(seed)

    device = torch.device(f'cuda:{devid}') if devid > -1 else torch.device('cpu')

    # ========== logging setup
    log_name = os.path.splitext(save_name)[0]
    # logger = logging_config(__name__, folder=save_dir, name=log_name, filemode=logmode)
    # logger = logging_config(os.path.basename(__file__), folder=save_dir, name=log_name, filemode=logmode)

    print('python ' + ' '.join(sys.argv))
    print('-' * 30)
    #logger.info(args)
    print('-' * 30)
    print(time.ctime())
    print('-' * 30)

    # ========== define the model, optimizer, and loss

    model = GCNModel(in_channels,
                     enc_sizes,
                     num_classes,
                     non_linear=act,
                     non_linear_layer_wise=layer_act,
                     residual_hop=residual_hop,
                     dropout=dropout,
                     final_type=final,
                     )

    print('model ' + '-' * 10)
    print(repr(model))
    model.to(device)

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.25, patience=1)

    # ========== train the model
    train(model, train_loader, val_ds, test_ds, optimizer, criterion,
          scheduler)


python /usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-34393bdf-8a6f-4acf-8e6f-da0472dc9d5c.json
------------------------------
------------------------------
Mon Oct 17 16:29:45 2022
------------------------------
model ----------
GCNModel(
  (conv0): GINConv(nn=Sequential(
    (0): Linear(in_features=1, out_features=32, bias=True)
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=32, out_features=32, bias=True)
    (4): ReLU()
  ))
  (conv1): GINConv(nn=Sequential(
    (0): Linear(in_features=32, out_features=32, bias=True)
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=32, out_features=32, bias=True)
    (4): ReLU()
  ))
  (conv2): GINConv(nn=Sequential(
    (0): Linear(in_features=32, out_features=32, bias=True)
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.