In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
import matplotlib.pyplot as plt

import torch
import numpy as np
import seaborn as sns
import torch.nn.functional as F
from torch import nn, optim

sys.path.append('scripts/')
import myutils
import gpu_utils as gu
import ptb_utils as au
import misc_utils as mu
import synth_models as sm
from synthetic_data import SemiRandomSignalCoordinate

## Train Models on Synthetic Data

In [11]:
def get_dataset(): 
    args = {
        'input_dim': 10,
        'num_train': 10_000,
        'num_test': 1_000,
        'batch_size': 250
    }
    
    sd_tr = SemiRandomSignalCoordinate(args['input_dim'], 1, 1./args['input_dim'])
    sd_te = SemiRandomSignalCoordinate(args['input_dim'], 1, 1./args['input_dim'])

    tr_dl = sd_tr.get_dataloader(args['num_train'], args['batch_size'])
    te_dl = sd_te.get_dataloader(args['num_test'], args['batch_size'])

    return {
        'obj': (sd_tr, sd_te),
        'loaders': (tr_dl, te_dl)
    }

def get_model(device):
    args = {
        'input_dim': 10,
        'num_classes': 2,
        'width': 25_000, 
        'depth': 1, 
        'activation': nn.ReLU,
        'lr': 0.1,
        'weight_decay': 0.0,
        'momentum': 0.0,
        'decay_gap': 50,
        'decay_factor': 0.75
    }
    
    model = sm.get_fcn(args['input_dim'], args['width'], 
                       args['num_classes'], activation=args['activation'])
    model = model.to(device)
    
    opt = optim.SGD(model.parameters(), lr=args['lr'], 
                    weight_decay=args['weight_decay'], 
                    momentum=args['momentum'])
    
    def lr_scheduler(epoch):
        pow = epoch//args['decay_gap']
        return max(0.01, args['decay_factor']**pow)

    sched = optim.lr_scheduler.LambdaLR(opt, lr_scheduler)

    return model, opt, sched


train_max_epochs = 200
n_iter = 0

# Train without regularisation
def standard_training(train_data, test_data, epsilon, data_bounds, device):
    # setup attack
    epsilon = 0.0  # make sure epsilon has right value and is not used from pref run
    attack = au.Linf_PGD_Attack(epsilon, lr=0.25*epsilon, num_iter=0, loss_type='untargeted', 
                                rand_eps=0.0, num_classes=2, bounds=data_bounds, device=device)

    # setup model
    model, opt, sched = get_model(device)
    
    # train for 500 epochs
    stats = myutils.pgd_adv_fit_model(model, opt, train_data, test_data, attack, 
                                    sch=sched, min_loss=0.0, max_epochs=train_max_epochs, 
                                    device=device, epoch_gap=10)
    
    return stats, model.cpu()

# Train with L1 gradient
def l1_gradient_training(train_data, test_data, epsilon, data_bounds, device):
    # setup attack
    epsilon = 0.45  # make sure epsilon has right value and is not used from pref run
    attack = au.L1_PGD_Attack(epsilon, lr=0.25*epsilon, num_iter=n_iter, loss_type='untargeted', 
                              rand_eps=0.0, num_classes=2, bounds=data_bounds, device=device)

    # setup model
    model, opt, sched = get_model(device)
    
    # train for 500 epochs
    stats = myutils.pgd_adv_fit_model(model, opt, train_data, test_data, attack, 
                                    sch=sched, min_loss=0.0, max_epochs=train_max_epochs, 
                                    device=device, epoch_gap=10)
    
    return stats, model.cpu()


# Train with L1 weight
def l1_weight_training(train_data, test_data, epsilon, data_bounds, device):
    # setup attack
    epsilon = 0.0  # make sure epsilon has right value and is not used from pref run
    attack = au.L1_PGD_Attack(epsilon, lr=0.25*epsilon, num_iter=n_iter, loss_type='untargeted', 
                              rand_eps=0.0, num_classes=2, bounds=data_bounds, device=device)

    # setup model
    model, opt, sched = get_model(device)
    
    # train for 500 epochs
    stats = myutils.pgd_adv_fit_model_l1(model, opt, train_data, test_data, attack, 
                                    sch=sched, min_loss=0.0, max_epochs=train_max_epochs, 
                                    device=device, epoch_gap=10)
    
    return stats, model.cpu()

# Train with L1 gradient and L1 weight
def l1_gradient_l1_weight_training(train_data, test_data, epsilon, data_bounds, device):
    # setup attack
    epsilon = 0.45  # make sure epsilon has right value and is not used from pref run
    attack = au.L1_PGD_Attack(epsilon, lr=0.25*epsilon, num_iter=n_iter, loss_type='untargeted', 
                              rand_eps=0.0, num_classes=2, bounds=data_bounds, device=device)

    # setup model
    model, opt, sched = get_model(device)
    
    # train for 500 epochs
    stats = myutils.pgd_adv_fit_model_l1(model, opt, train_data, test_data, attack, 
                                    sch=sched, min_loss=0.0, max_epochs=train_max_epochs, 
                                    device=device, epoch_gap=10)
    
    return stats, model.cpu()


# Train with L2 gradient
def l2_gradient_training(train_data, test_data, epsilon, data_bounds, device):
    # setup attack
    epsilon = 0.45  # make sure epsilon has right value and is not used from pref run
    attack = au.L2_PGD_Attack(epsilon, lr=0.25*epsilon, num_iter=n_iter, loss_type='untargeted', 
                              rand_eps=0.0, num_classes=2, bounds=data_bounds, device=device)

    # setup model
    model, opt, sched = get_model(device)
    
    # train for 500 epochs
    stats = myutils.pgd_adv_fit_model(model, opt, train_data, test_data, attack, 
                                    sch=sched, min_loss=0.0, max_epochs=train_max_epochs, 
                                    device=device, epoch_gap=10)
    
    return stats, model.cpu()

# Train with L2 weight
def l2_weight_training(train_data, test_data, epsilon, data_bounds, device):
    # setup attack
    epsilon = 0.0  # make sure epsilon has right value and is not used from pref run
    attack = au.L2_PGD_Attack(epsilon, lr=0.25*epsilon, num_iter=n_iter, loss_type='untargeted', 
                              rand_eps=0.0, num_classes=2, bounds=data_bounds, device=device)

    # setup model
    model, opt, sched = get_model(device)
    
    # train for 500 epochs
    stats = myutils.pgd_adv_fit_model_l2(model, opt, train_data, test_data, attack, 
                                    sch=sched, min_loss=0.0, max_epochs=train_max_epochs, 
                                    device=device, epoch_gap=10)
    
    return stats, model.cpu()


# Train with L2 gradient and L2 weight
def l2_gradient_l2_weight_training(train_data, test_data, epsilon, data_bounds, device):
    # setup attack
    epsilon = 0.45  # make sure epsilon has right value and is not used from pref run
    attack = au.L2_PGD_Attack(epsilon, lr=0.25*epsilon, num_iter=n_iter, loss_type='untargeted', 
                              rand_eps=0.0, num_classes=2, bounds=data_bounds, device=device)

    # setup model
    model, opt, sched = get_model(device)
    
    # train for 500 epochs
    stats = myutils.pgd_adv_fit_model_l2(model, opt, train_data, test_data, attack, 
                                    sch=sched, min_loss=0.0, max_epochs=train_max_epochs, 
                                    device=device, epoch_gap=10)
    
    return stats, model.cpu()

# Train with L1 gradient and L2 weight
def l1_gradient_l2_weight_training(train_data, test_data, epsilon, data_bounds, device):
    # setup attack
    epsilon = 0.45  # make sure epsilon has right value and is not used from pref run
    attack = au.L1_PGD_Attack(epsilon, lr=0.25*epsilon, num_iter=n_iter, loss_type='untargeted', 
                              rand_eps=0.0, num_classes=2, bounds=data_bounds, device=device)

    # setup model
    model, opt, sched = get_model(device)
    
    # train for 500 epochs
    stats = myutils.pgd_adv_fit_model_l2(model, opt, train_data, test_data, attack, 
                                    sch=sched, min_loss=0.0, max_epochs=train_max_epochs, 
                                    device=device, epoch_gap=10)
    
    return stats, model.cpu()

# Train with L2 gradient and L1 weight
def l2_gradient_l1_weight_training(train_data, test_data, epsilon, data_bounds, device):
    # setup attack
    epsilon = 0.45  # make sure epsilon has right value and is not used from pref run
    attack = au.L2_PGD_Attack(epsilon, lr=0.25*epsilon, num_iter=n_iter, loss_type='untargeted', 
                              rand_eps=0.0, num_classes=2, bounds=data_bounds, device=device)

    # setup model
    model, opt, sched = get_model(device)
    
    # train for 500 epochs
    stats = myutils.pgd_adv_fit_model_l1(model, opt, train_data, test_data, attack, 
                                    sch=sched, min_loss=0.0, max_epochs=train_max_epochs, 
                                    device=device, epoch_gap=10)
    
    return stats, model.cpu()

# Train with L_inf gradient
def linf_gradient_training(train_data, test_data, epsilon, data_bounds, device):
    # setup attack
    epsilon = 0.45  # make sure epsilon has right value and is not used from pref run
    attack = au.Linf_PGD_Attack(epsilon, lr=0.25*epsilon, num_iter=0, loss_type='untargeted', 
                                rand_eps=0.0, num_classes=2, bounds=data_bounds, device=device)

    # setup model
    model, opt, sched = get_model(device)
    
    # train for 500 epochs
    stats = myutils.pgd_adv_fit_model(model, opt, train_data, test_data, attack, 
                                    sch=sched, min_loss=0.0, max_epochs=train_max_epochs, 
                                    device=device, epoch_gap=10)
    
    return stats, model.cpu()

# Train with L_inf weight
def linf_weight_training(train_data, test_data, epsilon, data_bounds, device):
    # setup attack
    epsilon = 0.0  # make sure epsilon has right value and is not used from pref run
    attack = au.Linf_PGD_Attack(epsilon, lr=0.25*epsilon, num_iter=0, loss_type='untargeted', 
                                rand_eps=0.0, num_classes=2, bounds=data_bounds, device=device)

    # setup model
    model, opt, sched = get_model(device)
    
    # train for 500 epochs
    stats = myutils.pgd_adv_fit_model_linf(model, opt, train_data, test_data, attack, 
                                    sch=sched, min_loss=0.0, max_epochs=train_max_epochs, 
                                    device=device, epoch_gap=10)
    
    return stats, model.cpu()


### Dataset

In [None]:
# visualize subsample of dataset
# create new dataset
dataset = get_dataset()

bounds = dataset['obj'][0].bounds
train_data, test_data = dataset['loaders']

fig, ax = plt.subplots(1,1,figsize=(6,4))
ax = sns.heatmap(dataset['obj'][1].X[:40], linewidth=0.1)
myutils.update_ax(ax, r'Synthetic Dataset: $\{(y\cdot e_j, y)\}^{n}_1$', 'Coordinates', 'Data points',  
                legend_loc=False, despine=False, title_fs=20, label_fs=18)
                
ax.set_xticklabels(np.arange(1,11))

#fig.savefig('../plots/data.pdf', dpi=fig.dpi, bbox_inches='tight')

### Train Standard Model

In [None]:
device = gu.get_device(0) # change 0 to None if cpu
epsilon = 0.0 # standard training

std_stats, std_model = standard_training(train_data, test_data, epsilon, bounds, device)
torch.save(std_model.state_dict(), "../pretrained_models/std_model.t7")

### Train $L_1$ Models

In [None]:
device = gu.get_device(0) # change 0 to None if cpu
epsilon = None  # gets changed accordingly in the training functions

l1_weight_stats, l1_weight_model = l1_weight_training(train_data, test_data, epsilon, bounds, device)
torch.save(l1_weight_model.state_dict(), "../pretrained_models/l1_weight.t7")

l1_gradient_stats, l1_gradient_model = l1_gradient_training(train_data, test_data, epsilon, bounds, device)
torch.save(l1_gradient_model.state_dict(), "../pretrained_models/l1_gradient.t7")

### Train $L_2$ Models

In [None]:
device = gu.get_device(0) # change 0 to None if cpu

l2_weight_stats, l2_weight_model = l2_weight_training(train_data, test_data, epsilon, bounds, device)
torch.save(l2_weight_model.state_dict(), "../pretrained_models/l2_weight.t7")

l2_gradient_stats, l2_gradient_model = l2_gradient_training(train_data, test_data, epsilon, bounds, device)
torch.save(l2_gradient_model.state_dict(), "../pretrained_models/l2_gradient.t7")

### Train $L_\infty$ Models

In [None]:
device = gu.get_device(0) # change 0 to None if cpu

linf_gradient_stats, linf_gradient_model = linf_gradient_training(train_data, test_data, epsilon, bounds, device)
torch.save(linf_gradient_model.state_dict(), "../pretrained_models/l_inf_gradient.t7")

linf_weight_stats, linf_weight_model = linf_weight_training(train_data, test_data, epsilon, bounds, device)
torch.save(linf_weight_model.state_dict(), "../pretrained_models/l_inf_weight.t7")

# Load Models

In [None]:
device = gu.get_device(0) # change 0 to None if cpu

models_path = "../pretrained_models"

std_model, _, _ = get_model(device)
std_model.load_state_dict(torch.load(models_path + "/std_model.t7"))
std_model.eval()

l1_weight_model, _, _ = get_model(device)
l1_weight_model.load_state_dict(torch.load(models_path + "/l1_weight.t7"))
l1_weight_model.eval()

l1_gradient_model, _, _ = get_model(device)
l1_gradient_model.load_state_dict(torch.load(models_path + "/l1_gradient.t7"))
l1_gradient_model.eval()

l2_weight_model, _, _ = get_model(device)
l2_weight_model.load_state_dict(torch.load(models_path + "/l2_weight.t7"))
l2_weight_model.eval()

l2_gradient_model, _, _ = get_model(device)
l2_gradient_model.load_state_dict(torch.load(models_path + "/l2_gradient.t7"))
l2_gradient_model.eval()

linf_weight_model, _, _ = get_model(device)
linf_weight_model.load_state_dict(torch.load(models_path + "/l_inf_weight.t7"))
linf_weight_model.eval()

linf_gradient_model, _, _ = get_model(device)
linf_gradient_model.load_state_dict(torch.load(models_path + "/l_inf_gradient.t7"))
linf_gradient_model.eval()


### Evaluating Standard and Robust Models

In [None]:
device = gu.get_device(None)
accs = {}
epsilons = np.linspace(0, 0.2, 20)

models = {
    r'Standard': std_model,
    r'Weights L1': l1_weight_model,
    r'Gradients L1': l1_gradient_model,
    r'Weights L2': l2_weight_model,
    r'Gradients L2': l2_gradient_model,
    r'Weights Linf': linf_weight_model,
    r'Gradients Linf': linf_gradient_model
}

labels = {
    'Standard': r'standard model',
    'Weights L1': r'weights $\ell_1$ regularized',
    'Gradients L1': r'gradients $\ell_1$ regularized',
    'Weights L2': r'weights $\ell_2$ regularized',
    'Gradients L2': r'gradients $\ell_2$ regularized',
    'Weights Linf': r'weights $\ell_{\infty}$ regularized',
    'Gradients Linf': r'gradients $\ell_{\infty}$ regularized'
}

pgd_attack = lambda eps: au.Linf_PGD_Attack(eps, 0.25*eps, 8, 'untargeted', 
                                            rand_eps=0., num_classes=2, 
                                            bounds=bounds, device=device)

for model_name, model in models.items():
    model = model.to(device)
    accs[model_name] = mu.evaluate_robustness([model], test_data, pgd_attack, 
                                              epsilons, device, print_info=False)[0]
    model = model.cpu()
    del model
    
fig, ax = plt.subplots(1,1,figsize=(10,4))
for model_name, model_accs in accs.items():
    x, y = map(np.array, zip(*sorted(model_accs.items())))
    # ax.plot(x, y, marker='o', ms=5, mfc='w', mew=2, lw=2, ls='--', label=labels[model_name])
    ax.plot(x, y, ms=5, mfc='w', mew=2, lw=2, label=labels[model_name])
# ax.axvline(0.45, color='red', ls='--', label=r'$\epsilon=0.45$')
myutils.update_ax(ax, r'Evaluating Accuracy and Robustness', 
                r'Perturbation Budget $\epsilon$', r'Accuracy', ticks_fs=14, label_fs=18, title_fs=20)
ax.grid()
ax.legend(loc='best', ncol=1, fontsize=14, frameon=True, fancybox=True, prop={'size': 10})

ax.set_ylim(-0.05,1.05)

fig.savefig('../plots/synthetic_data_robustness.pdf', dpi=fig.dpi, bbox_inches='tight')

## Evaluating Input Gradients


In [19]:
# compute loss gradients w.r.t. input 
test_dataset = dataset['obj'][1]
device = gu.get_device(0)

grads = {}
for model_name, model in models.items():
    model = model.to(device)
    grads[model_name] = test_dataset.get_input_gradients(model, device)
    model = model.cpu()

In [None]:
# FIG: bottom-most input gradient coordinate highlights signal coordinate of data point in standard models
print ("Pr[bottom-most gradient attribution higlights signal]")

for model_name, G in grads.items():
    tracker = []
    for x, g in zip(test_dataset.X, G):
        signal_coord = np.argsort(np.abs(x))[-1]
        grad_coord = np.argsort(np.abs(g))[0]
        tracker.append(float(signal_coord==grad_coord))
    print ("{} Model: {:.3f}".format(model_name, np.mean(tracker)))

In [None]:
# RG-FIG: top-most input gradient coordinate highlights signal coordinate of data point in robust models
print ("Pr[top-most gradient attribution higlights signal]")

for model_name, G in grads.items():
    tracker = []
    for x, g in zip(test_dataset.X, G):
        signal_coord = np.argsort(np.abs(x))[-1]
        grad_coord = np.argsort(np.abs(g))[-1]
        tracker.append(float(signal_coord==grad_coord))
    print ("{} Model: {:.3f}".format(model_name, np.mean(tracker)))

In [None]:
# visualizing input gradients of standard and robust models
fig, axs = plt.subplots(1,1,figsize=(6, 5))
xticks = np.arange(1,11)


def filter_dict(d, keys_list):
    return {k: d[k] for k in keys_list if k in d}


my_grads = filter_dict(grads, ["Standard"])


for ax, (model_name, G) in zip([axs], my_grads.items()):
    # normalize gradients to improve visualization
    G = np.abs(G[:40])
    G /= G.max(axis=1).reshape(-1, 1)

    # plot gradients
    ax = sns.heatmap(G, ax=ax, vmax=1, vmin=0.8, linewidth=0.01, yticklabels=False, xticklabels=xticks)
    myutils.update_ax(ax, title=labels[model_name], xlabel=r'Coordinates', ylabel=r'Input Gradients', 
                    ticks_fs=15, label_fs=18, title_fs=22, despine=False, legend_loc=False)

    ax.tick_params(which='both', axis='both', length=3)
    ax.set_xticklabels(xticks, fontsize=15)
    

#fig.savefig('../plots/feature_inversion_synthetic_data.pdf', dpi=fig.dpi, bbox_inches='tight')

In [None]:
# Creates all 8 plots next to each other
# visualizing input gradients of standard and robust models
fig, axs = plt.subplots(3,3,figsize=(10, 10))
xticks = np.arange(1,11)

axs_flattened = []
for ax in axs:
    for a in ax:
        axs_flattened.append(a)

axs = axs_flattened


myutils.update_ax(axs[0], title=r'Input Data', xlabel=r'Coordinates', ylabel=r'Data Points',
                ticks_fs=10, label_fs=10, title_fs=14, despine=False, legend_loc=False)

for ax, (model_name, G) in zip(axs[0:], grads.items()):
    # normalize gradients to improve visualization
    G = np.abs(G[:40])
    G /= G.max(axis=1).reshape(-1, 1)

    # plot gradients
    ax = sns.heatmap(G, ax=ax, vmax=1, vmin=0.8, linewidth=0.01, yticklabels=False, xticklabels=xticks)
    myutils.update_ax(ax, title=labels[model_name], xlabel=r'Coordinates', ylabel=r'Input Gradients', 
                    ticks_fs=10, label_fs=10, title_fs=14, despine=False, legend_loc=False)

    ax.tick_params(which='both', axis='both', length=3)
    ax.set_xticklabels(xticks, fontsize=10)
    
sup = fig.suptitle('Input Gradients on Synthetic Data', y=1.04, fontsize=20)
fig.tight_layout(pad=1.5)
#fig.savefig('../plots/feature_inversion_synthetic_data_all.pdf', dpi=fig.dpi, bbox_inches='tight', bbox_extra_artists=[sup])