In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import baselines as b
import torch
from torch_models import load_models, BinaryClassifier, MultiClassifier, try_region_multi
from attacks import try_region_binary, distributional_oracle_multi, distributional_oracle_binary
%matplotlib inline

In [2]:
noise_budget = 1.3 # Multiclass noise budget
# noise_budget =2.3 # Binary noise budget

In [None]:
# exp_dirs = {'binary_oracle': 'experiment_results/binary_oracle_1/',
#             'binary_pgd': 'experiment_results/binary_pgd_1/' }

In [3]:
exp_dirs = {'multi_oracle': 'experiment_results/multi_oracle_1/',
            'multi_pgd': 'experiment_results/multi_pgd_1/' }

In [4]:
def load_experiment_results(exp_dirs):
    exp_results = dict()
    for name, exp_dir in exp_dirs.items():
        results = dict()
        results['noise_vectors'] = torch.load(exp_dir + 'noise_vectors.pt')
        results['expected_losses'] = np.load(exp_dir + 'expected_losses.npy')
        results['minimum_losses'] = np.load(exp_dir + 'minimum_losses.npy')
        exp_results[name] = results
    return exp_results

In [5]:
experiment_results = load_experiment_results(exp_dirs)

In [6]:
# data_folder = 'experiment_data/linear/binary/'
data_folder = 'experiment_data/linear/multi/'
images = torch.load(data_folder + 'mnist_images.pt')
labels = torch.load(data_folder + 'mnist_labels.pt')

In [7]:
# models = load_models('mnist_binary')
models = load_models('mnist_multi')



In [11]:
# sanity checks
print([model.accuracy(images, labels) for model in models])
ensemble = ensemble_linear_models(models)
print(ensemble.accuracy(images, labels))

[1.0, 1.0, 1.0, 1.0, 1.0]
1.0


### Ensemble Baseline

In [9]:
def subset_feasible_models(models, x, noise_budget):
    dists = [model.distance(x).item() for model in models]
    num_models = len(models)
    return [models[i] for i in range(num_models) if dists[i] < noise_budget]

In [8]:
def ensemble_linear_models(models):
    num_classifiers = len(models)
    model_type = type(models[0])
    if model_type is BinaryClassifier:
        TorchModel = BinaryClassifier
        ensemble_weights = sum([1.0 / num_classifiers * model.weights.reshape(1, -1)
                                for model in models])

    else:
        TorchModel = MultiClassifier
        ensemble_weights = sum([1.0 / num_classifiers * model.weights
                                for model in models])

    ensemble_bias = sum([1.0 / num_classifiers * model.bias for model in models])
    ensemble_weights = torch.tensor(ensemble_weights, dtype=torch.float)
    ensemble_bias = torch.tensor(ensemble_bias, dtype=torch.float)
    ensemble = TorchModel(ensemble_weights, ensemble_bias)
    return ensemble


def compute_linear_ensemble_baseline(models, images, labels, noise_budget):
    model_type = type(models[0])
    if model_type is BinaryClassifier:
        oracle = distributional_oracle_binary
        out_dim = 1
    else:
        oracle = distributional_oracle_multi
        out_dim = 3

    noise_vectors = []
    for i in range(len(images)):
        x = images[i].unsqueeze(0)
        y = labels[i]
        ensemble = ensemble_linear_models(subset_feasible_models(models, x, noise_budget))
        ensemble_array = [(torch.tensor(ensemble.weights.reshape(out_dim, -1),
                                        dtype=torch.float),
                           torch.tensor(ensemble.bias, dtype=torch.float))]

        v = oracle(np.ones(1), ensemble_array, x, y, sys.maxsize)
        v = v / v.norm() * noise_budget
        noise_vectors.append(v)
    return torch.stack(noise_vectors).reshape(images.size())

In [12]:
ensemble_noise_vectors = compute_linear_ensemble_baseline(models, images, labels, noise_budget)

In [13]:
max([v.norm().item() for v in ensemble_noise_vectors])

1.3000001907348633

In [14]:
ensemble_accs_per_point = b.compute_model_accs_per_point(models, images, ensemble_noise_vectors, labels)

In [15]:
print("Ensemble Baseline")
print("Mean ", np.mean(np.mean(ensemble_accs_per_point, axis=1)), 
      "Max ", np.mean(np.max(ensemble_accs_per_point, axis=1)))

Ensemble Baseline
Mean  0.314 Max  0.55


### Best Individual Baseline

In [16]:
def compute_individual_baseline(models, images, labels, noise_budget):
    
    model_type = type(models[0])
    if model_type is BinaryClassifier:
        oracle = distributional_oracle_binary
        out_dim = 1
    else:
        oracle = distributional_oracle_multi
        out_dim = 3

    noise_vectors = []
    for model in models:
        model_array = [(torch.tensor(model.weights.reshape(out_dim, -1),
                                     dtype=torch.float),
                        torch.tensor(model.bias, dtype=torch.float))]
        individual = []
        for i in range(len(images)):
            x = images[i].unsqueeze(0)
            y = labels[i]
            v = oracle(np.ones(1), model_array, x, y, sys.maxsize)[0]
            v = v / v.norm() * noise_budget
            individual.append(v)
        noise_vectors.append(torch.stack(individual))
        
    return torch.stack(noise_vectors)


In [17]:
individual_noise_vectors = compute_individual_baseline(models, images, labels, noise_budget)

In [18]:
individual_noise_vectors.shape

torch.Size([5, 100, 784])

In [19]:
[max([v.norm().item() for v in t]) for t in individual_noise_vectors]

[1.3000003099441528,
 1.3000001907348633,
 1.3000001907348633,
 1.3000001907348633,
 1.3000001907348633]

In [20]:
print("Best Individual Baseline")
print("Mean Max", b.compute_best_individual_baseline(models, images, individual_noise_vectors, labels))

Best Individual Baseline
Mean Max [0.7999999999999998, 1.0]


### Coordinate Ascent Baseline

In [21]:
def coordinate_ascent(models, x, y, noise_budget):
    models = subset_feasible_models(models, x, noise_budget)
    num_models = len(models)

    sol = torch.zeros(x.size())
    # can't trick anything
    if num_models == 0:
        return torch.zeros(x.size())

    model_type = type(models[0])
    if model_type is BinaryClassifier:
        try_region = try_region_binary
        labels = [-1, 1]
    else:
        try_region = try_region_multi
        labels = range(3)

    x = x.numpy().reshape(-1,)
    y = y.item()

    label_vector = [y] * num_models  # initialize to the original point, of length feasible_models
    label_options = list(set(labels).difference(set([y])))
    model_options = list(range(num_models))

    for i in range(num_models):
        coord = np.random.choice(model_options)
        model_options = list(set(model_options).difference([coord]))

        label_vector[coord] = np.random.choice(label_options)
        v = try_region(models, label_vector, x)

        if v is not None:
            norm = np.linalg.norm(v)
            if norm <= noise_budget:
                sol = torch.tensor(v, dtype=torch.float32).reshape(1,-1)
            else:
                break
        else:
            break

    return sol

In [22]:
def compute_linear_coordinate_ascent_baseline(models, images, labels, noise_budget):
    coordinate_ascent_baseline = []
    for i in range(len(images)):
        x = images[i].unsqueeze(0)
        y = labels[i]
        coordinate_ascent_baseline.append(coordinate_ascent(models, x, y, noise_budget))
    return torch.stack(coordinate_ascent_baseline).reshape(images.size())

In [23]:
coordinate_ascent_noise_vectors = compute_linear_coordinate_ascent_baseline(models, images, labels, noise_budget)

In [24]:
max([v.norm().item() for v in coordinate_ascent_noise_vectors])

1.2962597608566284

In [25]:
coordinate_ascent_accs_per_point = b.compute_model_accs_per_point(models, images, 
                                                                  coordinate_ascent_noise_vectors, labels)

In [26]:
print("Coordinate Ascent Baseline")
print("Mean ", np.mean(np.mean(coordinate_ascent_accs_per_point, axis=1)), 
      "Max ", np.mean(np.max(coordinate_ascent_accs_per_point, axis=1)))

Coordinate Ascent Baseline
Mean  0.494 Max  0.72


In [27]:
def compute_mwu_accuracies(models, noise_vectors, images, labels):
    mwu_iters, num_points = noise_vectors.shape[:2]
    model_accs_per_point = []
    for i in range(num_points):
        x = images[i]
        y = labels[i]
        model_accs = []
        for t in range(mwu_iters):
            v = noise_vectors[t][i]
            model_accs.append([model.accuracy(x + v, y) for model in models])
        model_accs_per_point.append(np.array(model_accs))
    
    max_acc_plot = []
    mean_acc_plot = []
    for t in range(mwu_iters):
        mean_acc = np.mean([np.mean(np.mean(model_accs[:t+1], axis=0)) for model_accs in model_accs_per_point])
        max_acc = np.mean([np.max(np.mean(model_accs[:t+1], axis=0)) for model_accs in model_accs_per_point])
        max_acc_plot.append(max_acc)
        mean_acc_plot.append(mean_acc)
    return {'max':max_acc_plot, 'mean':mean_acc_plot}

In [30]:
# mwu_oracle = compute_mwu_accuracies(models, experiment_results['binary_oracle']['noise_vectors'],
#                                     images, labels)
# mwu_pgd = compute_mwu_accuracies(models, experiment_results['binary_pgd']['noise_vectors'],
#                                  images, labels)
mwu_oracle = compute_mwu_accuracies(models, experiment_results['multi_oracle']['noise_vectors'],
                                    images, labels)
mwu_pgd = compute_mwu_accuracies(models, experiment_results['multi_pgd']['noise_vectors'],
                                 images, labels)

In [31]:
print('Baseline', 'Mean', 'Max')
# print('Individual Baselines', np.mean(individual_baselines_accs), np.max(individual_baselines_accs))
# print('Ensemble Baselines', np.mean(ensemble_baselines_accs), np.max(ensemble_baselines_accs))
# print('Coordinate Ascent Baselines', np.mean(coordinate_ascent_baseline_accs), np.max(coordinate_ascent_baseline_accs))
print('MWU Oracle ', mwu_oracle['mean'][-1], mwu_oracle['max'][-1])
print('MWU PGD ', mwu_pgd['mean'][-1], mwu_pgd['max'][-1])

Baseline Mean Max
MWU Oracle  0.13386666666666666 0.16766666666666663
MWU PGD  0.34606666666666663 0.5213333333333332


In [None]:
plt.plot(mwu_oracle['max'], label='MWU-Oracle')
plt.plot(mwu_pgd['max'], label='MWU-PGD')
plt.title('Linear Binary')
plt.xlabel('MWU Iteration')
plt.ylabel('Max Model Accuracy')
plt.legend(fontsize=12)
plt.savefig('binary.pdf')


In [None]:
plt.plot(mwu_pgd[1])

In [None]:
for p in acc_plot:
    plt.plot(p)