### Imports

In [None]:
# Misc
import random
import numpy as np
import pandas as pd
import math
import os
import time
import sys

In [None]:
# Sklearn
import sklearn
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
# Pytorch
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

import tensorflow
from tensorflow.keras.utils import to_categorical

data preprocessing

In [None]:

import os
import time
import importlib
import json
from collections import OrderedDict
import logging
import argparse
import numpy as np
import random

import torch
import torch.nn as nn
import torch.optim
import torch.utils.data
import torch.backends.cudnn
import torchvision.utils

from dataloader import get_loader

from numpy import loadtxt

from os import path


logging.basicConfig(
    format='[%(asctime)s %(name)s %(levelname)s] - %(message)s',
    datefmt='%Y/%m/%d %H:%M:%S',
    level=logging.DEBUG)
logger = logging.getLogger(__name__)

global_step = 0

debug = False

def str2bool(s):
    if s.lower() == 'true':
        return True
    elif s.lower() == 'false':
        return False
    else:
        raise RuntimeError('Boolean value expected')


def parse_args():

    model_config = OrderedDict([
        ('arch', 'resnet'),
        ('block_type', 'basic'),
        ('depth', 8), # depth=8 for ResNet8, and depth=50 for ResNet50
        ('base_channels', 16),
        ('input_shape', (1, 3, 32, 32)),
        ('n_classes', 1),
    ])

    optim_config = OrderedDict([
        ('epochs', 5),
        ('batch_size', 16), #we also use this number for the evaluation
        ('base_lr', 1e-3),
        ('weight_decay', 1e-4),
        ('milestones', json.loads('[10, 20, 25]')),
        ('lr_decay', 0.1),
    ])

    run_config = OrderedDict([
        ('seed', 17),
        ('outdir', 'result'),
        ('networkdir', 'neural_network'),
        ('debug', True),
    ])

    config = OrderedDict([
        ('model_config', model_config),
        ('optim_config', optim_config),
        ('run_config', run_config),
    ])

    return config


def load_model(config):
    module = importlib.import_module(config['arch'])
    Network = getattr(module, 'Network')
    return Network(config)


class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, num):
        self.val = val
        self.sum += val * num
        self.count += num
        self.avg = self.sum / self.count

def data_parser():

    # Load data
    dataFileName = '/content/u5k-r5k-auth12k.sample'
    meta_data = 0
    cols = 22
    ########### CONFIG END ###################

    # load the dataset
    raw_dataset = loadtxt(dataFileName, delimiter=' ', dtype=str)
    dataset = raw_dataset[:,2:cols] # TO SKIP UID RID
    #np.random.shuffle(dataset)

    # split into user-resource pair and operations variables
    feature = dataset.shape[1]
    attribs = feature - 4

    continuous_data = dataset[:,0:attribs - 8] # assume first eight attributes are continuous
    categorical_data = dataset[:,8:attribs] # assume second eight attributes are categorical
    target_data = dataset[:, 16] # target label -- permit or deny
    metadata_target = dataset[:, 0:attribs + 1] #(all metadata and one operation)

    categorical_encoded = to_categorical(categorical_data)

    scaler = MinMaxScaler()
    scaler.fit(continuous_data)    
    continuous_data = scaler.transform(continuous_data)

    continuous_data= continuous_data[..., np.newaxis]
    combined_data = np.concatenate((categorical_encoded, continuous_data), axis=2)

    #determine evaluation dataset size
    eval_size = (int)(combined_data.shape[0] * 0.20) #20% of total dataset

    X_Test = combined_data[:eval_size,:]
    Y_Test = target_data[:eval_size]
    X_Train = combined_data[eval_size:,:]
    Y_Train = target_data[eval_size:]

    return X_Train, X_Test, Y_Train, Y_Test, metadata_target


Model Training and Testing Methods

In [None]:

def train(epoch, model, optimizer, criterion, train_loader, run_config):
    global global_step

    logger.info('Train {}'.format(epoch))

    model = model.double()
    model.train()

    start = time.time()
    for step, (data, targets) in enumerate(train_loader):
        global_step += 1

        optimizer.zero_grad()
        targets = targets.double()
        targets = targets.unsqueeze(1)
        outputs = model(data.double())
        outputs = torch.sigmoid(outputs)
        loss = criterion(outputs, targets)
        
        loss.backward()
        optimizer.step()

        if step % 100 == 0:
            logger.info('Epoch {} Step {}/{}'.format(
                            epoch,
                            step,
                            len(train_loader)
                        ))

    elapsed = time.time() - start
    logger.info('Elapsed {:.2f}'.format(elapsed))


def test(epoch, model, criterion, test_loader, run_config):
    logger.info('Test {}'.format(epoch))
    model = model.double()
    model.eval()

    start = time.time()
    for step, (data, targets) in enumerate(test_loader):
        with torch.no_grad():
            outputs = model(data.double())
        
        targets = targets.double()
        targets = targets.unsqueeze(1)
        outputs = torch.sigmoid(outputs)
        loss = criterion(outputs, targets)

    elapsed = time.time() - start
    logger.info('Elapsed {:.2f}'.format(elapsed))

    return

Training ResNet

In [None]:
def main():
    # parse arguments
    config = parse_args()

    run_config = config['run_config']
    optim_config = config['optim_config']
    debug = run_config['debug']

    # set random seed
    seed = run_config['seed']
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    # create output directory
    outdir = run_config['outdir']
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    # create neural_network directory
    networkdir = run_config['networkdir']
    if not os.path.exists(networkdir):
        os.makedirs(networkdir)

    # save config as json file in output directory
    outpath = os.path.join(outdir, 'config.json')
    with open(outpath, 'w') as fout:
        json.dump(config, fout, indent=2)

    x_train, x_test, y_train, y_test, metadata_target = data_parser()
    if debug:
        print('x_train shape after return:', x_train.shape)
        print('y_train shape after return:', y_train.shape)
   
    model_config = config['model_config']
    if debug:
        print('before assigning, default input shape', model_config['input_shape'])
    
    input_shape = x_train[0].reshape((1,1,)+x_train[0].shape)
    model_config['input_shape'] = input_shape.shape
    if debug:
        print('model config input shape', model_config['input_shape'])

    train_loader, test_loader = get_loader(optim_config['batch_size'],
                                           x_train, x_test, y_train, y_test)

    if debug:
        print('train_loader len', len(train_loader), 'test_loader', len(test_loader))
    
    model = load_model(config['model_config'])
    n_params = sum([param.view(-1).size()[0] for param in model.parameters()])
    logger.info('n_params: {}'.format(n_params))

    criterion = torch.nn.BCELoss()

    # optimizer
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=optim_config['base_lr'])
    
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        milestones=optim_config['milestones'],
        gamma=optim_config['lr_decay'])

    test(0, model, criterion, test_loader, run_config)

    for epoch in range(1, optim_config['epochs'] + 1):
        model = model.float()
        train(epoch, model, optimizer, criterion, train_loader, run_config)
        scheduler.step()
        test(epoch, model, criterion, test_loader, run_config)

        state = OrderedDict([
            ('config', config),
            ('state_dict', model.state_dict()),
            ('optimizer', optimizer.state_dict()),
            ('epoch', epoch),
            #('accuracy', accuracy),
        ])
        model_path = os.path.join(networkdir, 'mlbac_model.pth')
        torch.save(state, model_path)
    
    print('End of model training. Trained model exported to: ', model_path)

    return model


Train ML model from scratch. If the model is trained once, **no need** to retrain further. We load the trained model for the adversarial attack simulation

In [None]:
main()

### Generate adversarial examples

In [None]:
debug = True
def train_load_save_model(model_obj, model_path):
    if path.isfile(model_path):
        if debug:
            print('Loading pre-trained model from: {}'.format(model_path))
        checkpoint = torch.load(model_path)
        model_obj.load_state_dict(checkpoint['state_dict'])
        if debug:
            print('model loading successful!')


def restore_trained_model_data():
    # parse arguments
    config = parse_args()

    run_config = config['run_config']
    optim_config = config['optim_config']
    debug = run_config['debug']
    debug = False

    # set random seed
    seed = run_config['seed']
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    # create output directory
    outdir = run_config['outdir']
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    # save config as json file in output directory
    outpath = os.path.join(outdir, 'config.json')
    with open(outpath, 'w') as fout:
        json.dump(config, fout, indent=2)

    x_train, x_test, y_train, y_test, metadata_target = data_parser()
    if debug:
        print('x_train shape after return:', x_train.shape)
        print('y_train shape after return:', y_train.shape)
   
    model_config = config['model_config']
    if debug:
        print('before assigning, default input shape', model_config['input_shape'])
    
    input_shape = x_train[0].reshape((1,1,)+x_train[0].shape)
    model_config['input_shape'] = input_shape.shape
    if debug:
        print('model config input shape', model_config['input_shape'])

    train_loader, test_loader = get_loader(optim_config['batch_size'],
                                           x_train, x_test, y_train, y_test)

    if debug:
        print('train_loader len', len(train_loader), 'test_loader', len(test_loader))
    
    model = load_model(config['model_config'])
    n_params = sum([param.view(-1).size()[0] for param in model.parameters()])
    logger.info('n_params: {}'.format(n_params))

    criterion = torch.nn.BCELoss()

    # optimizer
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=optim_config['base_lr'])
    
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        milestones=optim_config['milestones'],
        gamma=optim_config['lr_decay'])
    
    model_path = os.path.join('neural_network', 'mlbac_model.pth')
    train_load_save_model(model, model_path)
    model.eval()

    dataloader_iterator = iter(test_loader)
    testdata, targets = next(dataloader_iterator)

    return metadata_target, testdata, targets, model

**Determine Accessibility Constraint in terms of security levels**

In [None]:

def get_security_levels(metadatadata_target):
  print(metadata_target.shape)
  target = 'access'
  feature_names = ['umeta0','umeta1','umeta2','umeta3','rmeta0','rmeta1','rmeta2','rmeta3','umeta4','umeta5','umeta6','umeta7','rmeta4','rmeta5','rmeta6','rmeta7']

  scaler = MinMaxScaler()
  scaler.fit(metadata_target)    
  df_norm = scaler.transform(metadata_target)

  df = pd.DataFrame(df_norm, columns = feature_names + [target])
  cor = df.corr()
  cor_target = abs(cor[target])
  print('correlation with respect to target is successful!')

  security_levels = cor_target[:-1]
  security_levels = security_levels / np.linalg.norm(security_levels)
          
  return security_levels.values

In [None]:
def get_accessibility_constraint(security_levels, rows, cols):
  sec_levels_matrix = np.ones((rows, cols), dtype=float)

  data_type_wise_levels = np.split(security_levels, 2)
  continuous_sec_levels = data_type_wise_levels[0]
  categorical_sec_levels = data_type_wise_levels[1]

  for col in range(cols - 1):
    sec_levels_matrix[: , col] = categorical_sec_levels

  sec_levels_matrix[:, cols - 1] = continuous_sec_levels

  return sec_levels_matrix

**Load** the **trained model** for the adversarial attack simulation. Also, generate **accessibility constraint**

In [None]:
metadata_target, testdata, targets, model = restore_trained_model_data()
model = model.double()

sec_levels = get_security_levels(metadata_target)

r = int(testdata[0].shape[1])
c = int(testdata[0].shape[2])
accessibility_constraint = get_accessibility_constraint(sec_levels, r, c)


***Evaluate the adversarial attack performance***

For the **number of samples** for an evaluation, we change the "**batch_size**" in configuration, e.g. to test for 200 samples count, we set batch_size=200

In [None]:
# import CustomLowProFool approach
from CustomLowProFool import lowProFoolWithAccessibilityConstraint, lowProFoolWithNoAccessibilityConstraint

successful_adv = 0
success_deny = 0
success_grant = 0
fail_deny = 0
fail_grant = 0

maxiters = 25
alpha = 0.2
omega = 6.0

print('Evaluating for sample count: ', testdata.shape[0])

for row in range(testdata.shape[0]):
    data = testdata[row]
    data = data.double()
    x_tensor = data.reshape((1,)+data.shape)
    
    orig_pred, adv_pred, x_adv = lowProFoolWithAccessibilityConstraint(x_tensor, model, accessibility_constraint, maxiters, alpha, omega)
    #orig_pred, adv_pred, x_adv = lowProFoolWithNoAccessibilityConstraint(x_tensor, model, maxiters, alpha, omega)
    if orig_pred != adv_pred:
      successful_adv += 1
    target_pred = np.abs(1 - orig_pred)
    
    if target_pred == 0.0 and adv_pred == target_pred:
      success_deny += 1
    elif target_pred == 1.0 and adv_pred == target_pred:
      success_grant += 1
    elif target_pred == 0.0 and adv_pred != target_pred:
      fail_deny += 1
    elif target_pred == 1.0 and adv_pred != target_pred:
      fail_grant += 1

print('successful adversarial samples: ', successful_adv)
print('Success Rate', successful_adv/ testdata.shape[0])
#print('succss_deny', success_deny, 'success_grant', success_grant, 'fail_deny', fail_deny, 'fail_grant', fail_grant)


Evaluating for sample count:  16
successful adversarial samples:  12
Success Rate 0.75
