In [None]:
# Pytorch
import torch
from torch.utils.data import Dataset

# EMBER
import ember

# SKLearn
from sklearn.preprocessing import StandardScaler

# Misc
import NonuniformRobustness
from pathlib import Path

torch.manual_seed(0)

# EMBER data and adversarial example directory
data_dir = str(Path.home())+'/'


# Create dataset
class MMDataset(torch.utils.data.Dataset):
    """
    Creates dataset from numpy array
    """
    def __init__(self, x_tr, y_tr):
        self.sub_len = x_tr.shape[0]
        self.x = x_tr
        self.y = y_tr

    def __getitem__(self, index):
        return self.x[index, :], self.y[index]

    def __len__(self):
        return self.sub_len

EMBER dataset vectorized feature creation

In [None]:
# Create vectorized features for EMBER dataset
ember.create_vectorized_features(data_dir + 'ember2018/')


Retrieve the clean data and adversarial examples generated by problem space attacks

In [None]:
# Get clean train and test set
X_train, y_train, X_test, y_test = NonuniformRobustness.get_clean_data(data_dir)

# Get number of features
num_features = X_train.shape[1]

# Get adversarial example test set previously generated by problem space attacks
AE = NonuniformRobustness.get_adversarial_examples(data_dir+"Code/Adversarial_example_sets/", num_features,
                                                   attack_name="GNN")


$$\Omega$$  calculation for input perturbation constraint $$\|\Omega\delta\|\leq\epsilon$$

In [None]:
# Get transformation matrix Omega for perturbation constraint
Omega = NonuniformRobustness.omega(data_dir+"nonuniform_omega/", num_features, perturbation_constraint="Uniform")

Data scaling and dataset creation

In [None]:
# Data standardization
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
AE = sc.transform(AE)

# Creating training and testing datasets
train_data = MMDataset(X_train, y_train)
test_data = MMDataset(X_test, y_test)

Training the model

In [None]:
# Training configurations
config = {'TrainData': train_data,
          'TestData': test_data,
          'AdversarialExamples': AE,
          'attack_name': 'GNN',
          'omega': Omega,
          'adversarial_example_rate': 0.9,
          'epsilon': 1,
          'alpha': 0.1,
          'num_iter': 10,
          'num_restarts': 3,
          'learning_rate': 0.001,
          'batch_size': 1024,
          'num_epochs': 500,
          'threshold': 0.8336}

# Get adversarially trained model, roc_auc score, number of evading adversarial examples, clean data accuracy
trained_model, performance_metrics = NonuniformRobustness.get_trained_model(config, num_features)