In [None]:
import os
from glob import glob
from IPython.display import clear_output
import json
import numpy as np
import shutil
from sklearn import preprocessing
from matplotlib import pyplot as plt
%matplotlib inline
from GPUtil import showUtilization as gpu_usage
import re

from utils import read_graph_data, read_pickle, plot_histogram, process_graph_data, get_processed_addr

In [None]:
from torch_geometric.data import Data, Dataset
class CFG(Dataset):
    def __init__(self, graph_addr_list, root, label_transformer, transform=None, pre_transform=None):
        self.graph_addr_list = graph_addr_list
        self.label_transformer = label_transformer
        super(CFG, self).__init__(root, transform, pre_transform)

    @property
    def processed_file_names(self):
        return [f'data_{file_idx}.pt' for file_idx in range(len(self.graph_addr_list))]

    def process(self):
        graph_data_mapping = []
        one_hot_transform = T.OneHotDegree(max_degree = 187, cat = False, in_degree = True)

        for graph_idx, graph_addr in enumerate(self.graph_addr_list):
            try:
                graph_data = read_graph_data(graph_addr)

                raw_edges = graph_data['edge_list']
                raw_nodes = list(graph_data['node_dict'].keys())

                if 'benign' in graph_addr:
                    y = 'benign'
                elif 'tsunami' in graph_addr:
                    y = 'tsunami'
                elif 'mirai' in graph_addr:
                    y = 'mirai'
                elif 'gafgyt' in graph_addr:
                    y = 'gafgyt'

                unique_node_idx_counter = 0
                node_mapping = {}
                edges = [[], []]

                for node in raw_nodes:
                    node_mapping[str(node)] = unique_node_idx_counter
                    unique_node_idx_counter += 1

                for edge in raw_edges:
                    edges[0].append(node_mapping[str(edge[0])])
                    edges[1].append(node_mapping[str(edge[1])])

                edge_idx = torch.tensor(edges, dtype=torch.long)
                y = torch.tensor(self.label_transformer.transform([y]), dtype=torch.long)
                x = None
                data = Data(x=x, edge_index=edge_idx, y=y)
                data = one_hot_transform(data)
                
                if self.pre_filter is not None and not self.pre_filter(data):
                    continue

                if self.pre_transform is not None:
                    data = self.pre_transform(data)

                torch.save(data, osp.join(self.processed_dir, 'data_{}.pt'.format(graph_idx)))
            except Exception as e:
                print(graph_addr, e)

    def len(self):
        return len(self.processed_file_names)

    def get(self, idx):
        data = torch.load(osp.join(self.processed_dir, 'data_{}.pt'.format(idx)))
        return data

In [None]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
gpu_usage()

In [None]:
benign_train_graph_list = glob("datasets/normal_dataset/benign/train/*")
benign_test_graph_list = glob("datasets/normal_dataset/benign/test/*")
print('benign', len(benign_train_graph_list), len(benign_test_graph_list))

tsunami_train_graph_list = glob("datasets/normal_dataset/tsunami/train/*")
tsunami_test_graph_list = glob("datasets/normal_dataset/tsunami/test/*")
print('tsunami', len(tsunami_train_graph_list), len(tsunami_test_graph_list))

mirai_train_graph_list = glob("datasets/normal_dataset/mirai/train/*")
mirai_test_graph_list = glob("datasets/normal_dataset/mirai/test/*")
print('mirai', len(mirai_train_graph_list), len(mirai_test_graph_list))

gafgyt_train_graph_list = glob("datasets/normal_dataset/gafgyt/train/*")
gafgyt_test_graph_list = glob("datasets/normal_dataset/gafgyt/test/*")
print('gafgyt', len(gafgyt_train_graph_list), len(gafgyt_test_graph_list))

all_train_graphs_list = benign_train_graph_list + tsunami_train_graph_list + gafgyt_train_graph_list + mirai_train_graph_list
all_test_graphs_list = benign_test_graph_list + tsunami_test_graph_list + gafgyt_test_graph_list + mirai_test_graph_list

print(f'Train: {len(all_train_graphs_list)}, Test: {len(all_test_graphs_list)}')

In [None]:
import os
import os.path as osp 
from torch_geometric.data import Data, Dataset, Batch

from torch_geometric.loader import DataLoader
from torch.nn import Linear, Sequential, ReLU
import torch.nn.functional as F
from torch_geometric.nn import GraphConv, GCNConv, SAGEConv, GATConv, GINConv, global_add_pool
from torch_geometric.utils import remove_self_loops, add_self_loops
from torch_geometric.data import Data
import torch_geometric.transforms as T

In [None]:
selected_classes = ['benign', 'tsunami', 'mirai', 'gafgyt'] # Label Encdoer:  [0 3 2 1]
le = preprocessing.LabelEncoder()
le.fit(selected_classes)
print('Label Encdoer: ', le.transform(['benign', 'tsunami', 'mirai', 'gafgyt']))

train_dataset = CFG(graph_addr_list = all_train_graphs_list, root='data/train', label_transformer = le)
print(f'Number of training graphs: {train_dataset.len()}')

test_dataset = CFG(graph_addr_list = all_test_graphs_list, root='data/test', label_transformer = le)
benign_test_dataset = CFG(graph_addr_list = benign_test_graph_list, root='data/benign_test', label_transformer = le)
gafgyt_test_dataset = CFG(graph_addr_list = gafgyt_test_graph_list, root='data/gafgyt_test', label_transformer = le)
mirai_test_dataset = CFG(graph_addr_list = mirai_test_graph_list, root='data/mirai_test', label_transformer = le)
tsunami_test_dataset = CFG(graph_addr_list = tsunami_test_graph_list, root='data/tsunami_test', label_transformer = le)
print(f'Number of test graphs: {test_dataset.len()} - (Number of benign test graphs: {benign_test_dataset.len()} - Number of gafgyt test graphs: {gafgyt_test_dataset.len()} - Number of mirai test graphs: {mirai_test_dataset.len()} - Number of tsunami test graphs: {tsunami_test_dataset.len()})')

adversarial_dir = r"datasets/adversarial_dataset"
adv_addr_dict = {'adversarial_benign': {'min': [], 'median': [], 'max': []}, 'adversarial_gafgyt': {'min': [], 'median': [], 'max': []}, 'adversarial_mirai': {'min': [], 'median': [], 'max': []}, 'adversarial_tsunami': {'min': [], 'median': [], 'max': []}}
for i in glob(adversarial_dir + '/*'):
    base_name = os.path.basename(i)
    adversarial_graph_data = read_graph_data(i)
    re_out = re.search(r"(benign|gafgyt|mirai|tsunami)_(max|median|min)", base_name).groups()
    adv_addr_dict[f'adversarial_{re_out[0]}'][re_out[1]].append(i)

for k, v in adv_addr_dict.items():
    print(k, end=': ')
    for sub_k, sub_v in v.items():
        print(sub_k, len(sub_v), end='  ')
    print()

In [None]:
def split_dataset(train_dataset, test_dataset, adversarial_dataset, batch_size):
    dataset_dict = {
        'normal': {
            'train': DataLoader(train_dataset, batch_size=batch_size, shuffle=True), 
            'total_test': DataLoader(test_dataset, batch_size=batch_size, shuffle=False),
        },
    }

    for k, v in dataset_dict.items():
        if k == 'adversarial':
            for temp_k, temp_v in v.items():
                print(f'Number of {temp_k} Samples: {len(temp_v.dataset)}')
        else:
            print(f"Number of {k} samples in train-set: {len(v['train'].dataset)}", f"Number of {k} samples in test-set: {len(v['total_test'].dataset)}")

    return dataset_dict

In [None]:
class GNN_CLASSIFIER(torch.nn.Module):
    def __init__(self, config):
        super(GNN_CLASSIFIER, self).__init__()
        self.config = config

        if self.config['conv_layer_type'].__name__ == 'GCNConv':
            self.graphConv_layer_list = torch.nn.ModuleList([self.config['conv_layer_type'](*layer, bias = True) for layer in self.config['layer_size_list']])
        elif self.config['conv_layer_type'].__name__ == 'GINConv':
            self.graphConv_layer_list = torch.nn.ModuleList()
            for layer in self.config['layer_size_list']:
                in_mlp = Sequential(
                    Linear(layer[0], layer[1], bias = True), 
                    torch.nn.BatchNorm1d(layer[1]), 
                    ReLU(), 
                    Linear(layer[1], layer[1], bias = True))
                self.graphConv_layer_list.append(self.config['conv_layer_type'](in_mlp))
        elif self.config['conv_layer_type'].__name__ == 'GATConv':
            self.graphConv_layer_list = torch.nn.ModuleList()
            for layer_idx, layer in enumerate(self.config['layer_size_list']):
                if layer_idx == 0:
                    self.graphConv_layer_list.append(self.config['conv_layer_type'](*layer, heads = 8, bias = True, dropout = config['dropout']))
                elif layer_idx < len(self.config['layer_size_list']) - 1:
                    self.graphConv_layer_list.append(self.config['conv_layer_type'](in_channels = layer[0] * 8, out_channels = layer[1], heads = 8, bias = True, dropout = config['dropout']))
                else:
                    self.graphConv_layer_list.append(self.config['conv_layer_type'](in_channels = layer[0] * 8, out_channels = layer[1], heads = 1, bias = True, dropout = config['dropout']))
        elif self.config['conv_layer_type'].__name__ == 'SAGEConv':
            self.graphConv_layer_list = torch.nn.ModuleList([self.config['conv_layer_type'](*layer, aggr = config['pooling_option'].__name__.split('_')[1], bias = True) for layer in self.config['layer_size_list']])

        if self.config['batch_normalization']:
            self.bn_list = torch.nn.ModuleList([torch.nn.BatchNorm1d(layer[1]) for layer in self.config['layer_size_list']])

        self.linear_layer_list = torch.nn.ModuleList([Linear(32, 32, bias = True), Linear(32, 4, bias = True)])
        self.linear_bn_list = torch.nn.ModuleList([torch.nn.BatchNorm1d(32)])

    def forward(self, x_data, edge_index_data, batch):
        x, edge_index = x_data, edge_index_data

        if self.config['virtual_edges']:
            edge_index, _ = remove_self_loops(edge_index)
            edge_index, _ = add_self_loops(edge_index, num_nodes = x.size(0))

        num_layers = len(self.graphConv_layer_list)
        for layer_idx, layer in enumerate(self.graphConv_layer_list):
            x = layer(x, edge_index)
            if self.config['conv_layer_type'].__name__ == 'GCNConv':
                if self.config['batch_normalization']:
                    x = self.bn_list[layer_idx](x)
                x = self.config['activation_func'](x)
                if self.config['dropout'] > 0.0 and self.training:
                    x = F.dropout(x, p = self.config['dropout'], training = self.training)
            elif self.config['conv_layer_type'].__name__ == 'GINConv':
                if self.config['batch_normalization']:
                    x = self.bn_list[layer_idx](x)
                x = self.config['activation_func'](x)
                if self.config['dropout'] > 0.0 and self.training: 
                    x = F.dropout(x, p = self.config['dropout'], training = self.training) 
            elif self.config['conv_layer_type'].__name__ == 'GATConv':
                if self.config['batch_normalization']:
                    x = self.bn_list[layer_idx](x)
                x = self.config['activation_func'](x)
                if self.config['dropout'] > 0.0 and self.training: 
                    x = F.dropout(x, p = self.config['dropout'], training = self.training)
            elif self.config['conv_layer_type'].__name__ == 'SAGEConv':
                if self.config['batch_normalization']:
                    x = self.bn_list[layer_idx](x)
                x = self.config['activation_func'](x)
                if self.config['dropout'] > 0.0 and self.training: 
                    x = F.dropout(x, p = self.config['dropout'], training = self.training)               

        x_pooled = self.config['pooling_option'](x, batch)

        num_layers = len(self.linear_layer_list)
        for layer_idx, layer in enumerate(self.linear_layer_list):
            x_pooled = layer(x_pooled)
            if layer_idx < num_layers - 1:
                if self.config['batch_normalization']:
                    x_pooled = self.linear_bn_list[layer_idx](x_pooled)
                x_pooled = self.config['activation_func'](x_pooled)
                if self.config['dropout'] > 0.0 and self.training:
                    x_pooled = F.dropout(x_pooled, p = self.config['dropout'], training = self.training)           
        
        return x, x_pooled

In [None]:
def train(model, loader, criterion, optimizer):
    model.train()
    for data in loader:  # Iterate in batches over the training dataset.
        optimizer.zero_grad()  # Clear gradients.
        data_gpu = data.to(device)
        _, out = model(data_gpu.x, data_gpu.edge_index, data_gpu.batch)  # Perform a single forward pass.
        loss = criterion(out, data_gpu.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.

def test(model, loader, criterion):
    correct, loss = 0, 0

    model.eval()
    with torch.no_grad():
        for data in loader:  # Iterate in batches over the training/test dataset.
            data_gpu = data.to(device)
            _, out = model(data_gpu.x, data_gpu.edge_index, data_gpu.batch)   
            pred = out.argmax(dim=1)  # Use the class with highest probability.
            loss += float(criterion(out, data_gpu.y).sum())
            correct += int((pred == data_gpu.y).sum())  # Check against ground-truth labels.
         
    return loss / len(loader.dataset), correct / len(loader.dataset),   # Derive ratio of correct predictions.

In [None]:
def test_adversarial_robustness(model, le, criterion, adversarial_graph_addr_list):
    original_graph_list, target_graph_list, adversarial_graph_list = [], [], []
    original_addr_list, target_addr_list, adversarial_addr_list = [], [], []
    original_score_dist, target_score_dist, adversarial_score_dist, failed_adversarial_score_dist = [], [], [], []

    original_loss, original_correct = 0, 0
    target_loss, target_correct = 0, 0
    robust_counter, adversarial_counter, ineffective_counter = 0, 0, 0
    original_num_samples, target_num_samples = 0, 0

    with torch.no_grad():
        for i in adversarial_graph_addr_list:
            adversarial_graph_data = read_graph_data(i)
            adversarial_model_data = T.OneHotDegree(max_degree = 187, cat = False, in_degree = True)(process_graph_data(adversarial_graph_data, 'adversarial', le))
            adversarial_model_data = Batch.from_data_list([adversarial_model_data])
    
            original_addr = adversarial_graph_data['description']['original'][0]
            original_processed_path, original_label = get_processed_addr(original_addr)
            original_graph_data = read_graph_data(original_processed_path)
            original_model_data = T.OneHotDegree(max_degree = 187, cat = False, in_degree = True)(process_graph_data(original_graph_data, original_label, le))
            original_model_data = Batch.from_data_list([original_model_data])

            target_addr = adversarial_graph_data['description']['target'][0]
            target_processed_path, target_label = get_processed_addr(target_addr)
            target_graph_data = read_graph_data(target_processed_path)
            target_model_data = T.OneHotDegree(max_degree = 187, cat = False, in_degree = True)(process_graph_data(target_graph_data, target_label, le))
            target_model_data = Batch.from_data_list([target_model_data])

            original_model_data = original_model_data.to('cuda')
            _, original_out = model(original_model_data.x, original_model_data.edge_index, original_model_data.batch)
            original_pred = original_out.argmax(dim=1)
    
            if original_addr not in original_addr_list:
                original_addr_list.append(original_addr)
                original_num_samples += 1
                original_loss += float(criterion(original_out, original_model_data.y).sum())
                original_correct += int((original_pred == original_model_data.y).sum())


            target_model_data = target_model_data.to('cuda')
            _, target_out = model(target_model_data.x, target_model_data.edge_index, target_model_data.batch)
            target_pred = target_out.argmax(dim = 1)


            if target_addr not in target_addr_list:
                target_addr_list.append(target_addr)
                target_num_samples += 1 
                
                target_loss += float(criterion(target_out, target_model_data.y).sum())
                target_correct += int((target_pred == target_model_data.y).sum())
            

            adversarial_model_data = adversarial_model_data.to('cuda')
            _, adversarial_out = model(adversarial_model_data.x, adversarial_model_data.edge_index, adversarial_model_data.batch)

            # <<< Softmax >>>
            adversarial_pred = adversarial_out.argmax(dim = 1)
            
            if (original_model_data.y.item() == original_pred.item()) and (target_model_data.y.item() == target_pred.item()):
                if adversarial_pred.item() == target_pred.item():
                    adversarial_counter += 1
                    adversarial_score_dist.append(F.softmax(adversarial_out, dim = 1).max(dim = 1).values.detach().to('cpu').item())
                    adversarial_graph_list.append(adversarial_model_data[0].detach().to('cpu')) 
                else:
                    robust_counter += 1
                    failed_adversarial_score_dist.append(F.softmax(adversarial_out, dim = 1).max(dim = 1).values.detach().to('cpu').item())
            else:
                ineffective_counter += 1

    print(f'adv counter: {adversarial_counter} - robust counter: {robust_counter} - Ineffective Samples: {ineffective_counter} - Total: {adversarial_counter + robust_counter + ineffective_counter} - adv ratio: {adversarial_counter / (adversarial_counter + robust_counter) if (adversarial_counter + robust_counter) else 0}')

    return adversarial_graph_list, adversarial_score_dist, failed_adversarial_score_dist, adversarial_counter, robust_counter, adversarial_counter / (adversarial_counter + robust_counter) if (adversarial_counter + robust_counter) else 0

In [None]:
def get_adversarial_confusion_matrix(model, le, adversarial_graph_addr_list):
    confustion_matrix = {'benign': 0, 'gafgyt': 0, 'mirai': 0, 'tsunami': 0}

    with torch.no_grad():
        for i in adversarial_graph_addr_list:
            adversarial_graph_data = read_graph_data(i)
            adversarial_model_data = T.OneHotDegree(max_degree = 187, cat = False, in_degree = True)(process_graph_data(adversarial_graph_data, 'adversarial', le))
            adversarial_model_data = Batch.from_data_list([adversarial_model_data]).to('cuda')      
            _, adversarial_out = model(adversarial_model_data.x, adversarial_model_data.edge_index, adversarial_model_data.batch)
            adversarial_pred = adversarial_out.argmax(dim = 1)
            confustion_matrix[le.inverse_transform(adversarial_pred.to('cpu'))[0]] += 1

    return confustion_matrix

In [None]:
def get_model_setting(config):
    layer_size_str = ''.join(str(s[-1])+'-' for s in config['layer_size_list'])[:-1]
    conv_layer_type_str = config['conv_layer_type'].__name__
    batch_size_str = str(config['batch_size'])
    bn_str = 'batchNorm' if config['batch_normalization'] else 'noBatchNorm'
    pooling_str = config['pooling_option'].__name__.split('_')[1] + 'Pool'
    dropout_str = 'dropout' + str(config['dropout'])
    lr_str = 'lr' + str(config['lr'])

    model_setting = f'batchSize{batch_size_str}_{conv_layer_type_str}{layer_size_str}_linear32-32_{pooling_str}_{bn_str}_{dropout_str}_{lr_str}'
    return model_setting

In [None]:
def train_test_classifier(config):
    save_addr_base = 'classifier'
    
    classifier = GNN_CLASSIFIER(config).to(device)
    optimizer = config['optimizer'](classifier.parameters(), lr=config['lr'])
    criterion = torch.nn.CrossEntropyLoss()

    model_setting = get_model_setting(config)

    print('Model Config: {}'.format(model_setting))

    dataset_dict = split_dataset(train_dataset, test_dataset, None, config['batch_size'])

    ########### training
    train_acc_list, test_acc_list = [], []
    train_err_list, test_err_list = [], []
    Epochs = 150
    early_stopping_counter = 0
    num_stop_hit = 0
    early_stopping_criteria = 15

    for epoch in range(Epochs):
        train(classifier, dataset_dict['normal']['train'], criterion, optimizer)
        train_err, train_acc = test(classifier, dataset_dict['normal']['train'], criterion)
        test_err, test_acc = test(classifier, dataset_dict['normal']['total_test'], criterion)

        train_acc_list.append(train_acc)
        train_err_list.append(train_err)
        test_acc_list.append(test_acc)
        test_err_list.append(test_err)

        print(f'=== Epoch: {epoch + 1} / {Epochs}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}, Train err: {train_err:.4f}, Test Err: {test_err:.4f} ===')
        
        plt.figure(figsize=(20, 5))
        plt.title('{} Accuracy/Epoch\nMaximum Test Accuracy: {} at Epoch {}'.format(model_setting, np.max(test_acc_list), np.argmax(test_acc_list) + 1))
        plt.plot(train_acc_list, label = 'Train')
        plt.plot(test_acc_list, label = 'Test')
        plt.xticks(range(1, epoch + 2))
        plt.xlabel('Epoch') 
        plt.ylabel('Accuracy')
        plt.grid()
        plt.legend()
        plt.yticks(np.arange(0, 1.05, 0.05))
        plt.savefig(f"{save_addr_base}/plots/{model_setting}_acc_plot.pdf")
        if (epoch + 1) % 10 == 0:
            plt.show()
        plt.close()

        plt.figure(figsize=(20, 5))
        plt.title('{} Loss/Epoch'.format(model_setting))
        plt.plot(train_err_list, label = 'Train')
        plt.plot(test_err_list, label = 'Test')
        plt.xticks(range(1, epoch + 2))
        plt.xlabel('Epoch') 
        plt.ylabel('Loss')
        plt.grid()
        plt.legend()
        plt.savefig(f"{save_addr_base}/plots/{model_setting}_err_plot.pdf")
        if (epoch + 1) % 10 == 0:
            plt.show()
        plt.close()

        if epoch > 0:
            max_test_acc = max(test_acc_list[:-1])
            if test_acc > max_test_acc:
                print('Best test accuracy so far.')
                early_stopping_counter = 0
                torch.save(classifier, f"{save_addr_base}/models/{model_setting}")  
            else:
                early_stopping_counter += 1
                print('No improvement.')

                if early_stopping_counter == early_stopping_criteria:
                    num_stop_hit += 1
                    if num_stop_hit == 4:
                        print('Stop training.')
                        break
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = 0.1 * param_group['lr']
                    classifier = torch.load(f"{save_addr_base}/models/{model_setting}").to(device)
                    print(f'Reducing lr. Number of Stop hits: {num_stop_hit}')
                    early_stopping_counter = 0

        else:
            torch.save(classifier, f"{save_addr_base}/models/{model_setting}")   

    clear_output()

    print('Model Config: {}'.format(model_setting))

    ########## load model
    classifier = torch.load(f"{save_addr_base}/models/{model_setting}").to(device)
    criterion = torch.nn.CrossEntropyLoss()
    classifier.eval()

    for k, v in dataset_dict['normal'].items():
        if 'test' in k:
            test_err, test_acc = test(classifier, v, criterion)
            print(f'Evaluation on Normal {k} Data ({len(v.dataset)}) --> Error: {test_err}, Accuracy: {test_acc}')

    ########## Find Normal Data Output Distribution
    train_benign_score_dist, test_benign_score_dist = [], []
    train_gafgyt_score_dist, test_gafgyt_score_dist,  = [], []
    train_mirai_score_dist, test_mirai_score_dist = [], []
    train_tsunami_score_dist, test_tsunami_score_dist = [], []

    for data_split, loader in dataset_dict['normal'].items():
        benign_dist = []
        gafgyt_dist = []
        mirai_dist = []
        tsunami_dist = []

        with torch.no_grad():
            for data in loader:  # Iterate in batches over the training/test dataset.
                data_gpu = data.to(device)
                _, out = classifier(data_gpu.x, data_gpu.edge_index, data_gpu.batch)   
                pred = F.softmax(out, dim = 1).max(dim = 1).values.detach().to('cpu')

                for y, y_hat in zip(data.detach().to('cpu').y.tolist(), pred.tolist()):
                    if y == le.transform(['benign'])[0]:
                        benign_dist.append(y_hat)
                    elif y == le.transform(['tsunami'])[0]:
                        tsunami_dist.append(y_hat)
                    elif y == le.transform(['mirai'])[0]:
                        mirai_dist.append(y_hat)
                    elif y == le.transform(['gafgyt'])[0]:
                        gafgyt_dist.append(y_hat)

        if data_split == 'train':
            train_benign_score_dist = benign_dist[:]
            train_tsunami_score_dist = tsunami_dist[:]
            train_mirai_score_dist = mirai_dist[:]
            train_gafgyt_score_dist = gafgyt_dist[:]
        elif data_split == 'test':
            test_benign_score_dist = benign_dist[:]
            test_tsunami_score_dist = tsunami_dist[:]
            test_mirai_score_dist = mirai_dist[:]
            test_gafgyt_score_dist = gafgyt_dist[:]
            

    print(f"Train Benign Scores: {len(train_benign_score_dist)}, Test Benign Scores: {len(test_benign_score_dist)}")
    print(f"Train gafgyt Scores: {len(train_gafgyt_score_dist)}, Test gafgyt Scores: {len(test_gafgyt_score_dist)}")
    print(f"Train mirai Scores: {len(train_mirai_score_dist)}, Test mirai Scores: {len(test_mirai_score_dist)}")
    print(f"Train tsunami Scores: {len(train_tsunami_score_dist)}, Test tsunami Scores: {len(test_tsunami_score_dist)}")

    print(f"Number of Adversarial Addresses --> Benign: {len(adv_addr_dict['adversarial_benign'])} - Gafgyt: {len(adv_addr_dict['adversarial_gafgyt'])} - Mirai: {len(adv_addr_dict['adversarial_mirai'])} - Tsunami: {len(adv_addr_dict['adversarial_tsunami'])}")

    adv_metric_dict = {'adversarial_benign': None, 'adversarial_gafgyt': None, 'adversarial_mirai': None, 'adversarial_tsunami': None}
    for adv_k, adv_v in adv_addr_dict.items():
        adversarial_graph_list, adversarial_score_dist, failed_adversarial_score_dist, adversarial_counter, robust_counter, robustness_ratio = test_adversarial_robustness(classifier, le, criterion, adv_v['min'] + adv_v['median'] + adv_v['max'])
        adv_ratio = adversarial_counter / (adversarial_counter + robust_counter) if (adversarial_counter + robust_counter) else 0
        print(f'Adversarial Report for {adv_k}: ', adversarial_counter, robust_counter, adv_ratio, len(adversarial_score_dist + failed_adversarial_score_dist))
        adv_metric_dict[adv_k] = (adversarial_counter, robust_counter, adv_ratio)
    with open(f"{save_addr_base}/logs/adversarialReport_config-{model_setting}.txt", 'w') as f:
        f.write(json.dumps(adv_metric_dict))

    print('=================')
    for adv_k, adv_v in adv_addr_dict.items():
        for sub_adv_k, sub_adv_v in adv_v.items():
            cf = get_adversarial_confusion_matrix(classifier, le, sub_adv_v)
            print(adv_k, sub_adv_k, sum(cf.values()), cf)
        print('-----------')

    plot_histogram(
        data_list = [train_benign_score_dist, test_benign_score_dist, train_tsunami_score_dist, test_tsunami_score_dist, train_mirai_score_dist, test_mirai_score_dist, train_gafgyt_score_dist, test_gafgyt_score_dist, adversarial_score_dist],
        num_bins = 300,
        legend_list = ['Benign Train', 'Benign Test', 'Tsunami Train', 'Tsunami Test', 'Mirai Train', 'Mirai Test', 'Gafgyt Train', 'Gafgyt Test', 'Adversarial'],
        title = f'Max softmax distribution of all subsets for {model_setting}', x_label = 'Softmax Score', y_label = 'Number of Samples', save_path = f'{save_addr_base}/plots/all_scores_{model_setting}.pdf'
    ) 
    
    plot_histogram(
        data_list = [adversarial_score_dist, failed_adversarial_score_dist], 
        num_bins = 300,
        legend_list = ['Adversarial', 'Failed Adversarial'],
        title = f'Max softmax distribution of adversarial samples for {model_setting}', x_label = 'Softmax Score', y_label = 'Number of Samples', save_path = f'{save_addr_base}/plots/adversrial_scores_{model_setting}.pdf'
    )


In [None]:
for config in [
  {'lr': 0.01, 'batch_size': 128, 'dropout': 0.0, 'optimizer': torch.optim.Adam, 'virtual_edges': False, 'conv_layer_type': SAGEConv, 'layer_size_list': [(188, 64), (64, 64), (64, 32)], 'conv_normalize': False, 'batch_normalization': True, 'pooling_option': global_add_pool, 'activation_func': F.relu},
]:
  print('<<<<<<<<<<<<<<<< >>>>>>>>>>>>>>>>>')
  train_test_classifier(config) 