In [57]:
import common

In [58]:
import torch, torchvision
import numpy as np
import matplotlib.pyplot as plt
import pyro
import tqdm
import os,sys
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
from torch.optim import Adam, lr_scheduler


import re, pickle
from torch.utils.data import DataLoader, random_split


In [59]:
# Reproducibility
common.set_seed(1)

In [60]:
layer_type = 'lrt'  # 'bbb' or 'lrt'
activation_type = 'softplus'  # 'softplus' or 'relu'
priors={
    'prior_mu': 0,
    'prior_sigma': 0.1,
    'posterior_mu_initial': (0, 0.1),  # (mean, std) normal_
    'posterior_rho_initial': (-5, 0.1),  # (mean, std) normal_
}
lr_start = 0.001
num_workers = 1
valid_size = 0.2
batch_size = 256
train_ens = 1
valid_ens = 1
beta_type = 0.1  
transform_mnist = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])
outputs = 10
inputs = 1

In [61]:
trainset = torchvision.datasets.MNIST(root='.', train=True, download=True, transform=transform_mnist)
num_train = len(trainset)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, 
                                           sampler=train_sampler, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, 
                                           sampler=valid_sampler, num_workers=num_workers)

In [62]:
class BBBLeNet(common.ModuleWrapper):
    '''The architecture of LeNet with Bayesian Layers'''

    def __init__(self, outputs, inputs, priors, layer_type='lrt', activation_type='softplus'):
        super(BBBLeNet, self).__init__()

        self.num_classes = outputs
        self.layer_type = layer_type
        self.priors = priors

        if layer_type=='lrt':
            BBBLinear = common.layers.BBB_LRT_Linear
            BBBConv2d = common.layers.BBB_LRT_Conv2d
        elif layer_type=='bbb':
            BBBLinear = common.layers.BBB_Linear
            BBBConv2d = common.layers.BBB_Conv2d
        else:
            raise ValueError("Undefined layer_type")
        
        if activation_type=='softplus':
            self.act = nn.Softplus
        elif activation_type=='relu':
            self.act = nn.ReLU
        else:
            raise ValueError("Only softplus or relu supported")

        self.conv1 = BBBConv2d(inputs, 6, 5, padding=0, bias=True, priors=self.priors)
        self.act1 = self.act()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = BBBConv2d(6, 16, 5, padding=0, bias=True, priors=self.priors)
        self.act2 = self.act()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.flatten = common.layers.FlattenLayer(5 * 5 * 16)
        self.fc1 = BBBLinear(5 * 5 * 16, 120, bias=True, priors=self.priors)
        self.act3 = self.act()

        self.fc2 = BBBLinear(120, 84, bias=True, priors=self.priors)
        self.act4 = self.act()

        self.fc3 = BBBLinear(84, outputs, bias=True, priors=self.priors)
        
        
class LeNet(nn.Module):

    def __init__(self, outputs, inputs, layer_type='lrt', activation_type='softplus'):
        '''
        Base LeNet model that matches the architecture of BayesianLeNet with randomly 
        initialized weights
        '''
        super(LeNet, self).__init__()
        
        # initialization follows the BBBLeNet initialization, changing
        # BBBLinear and BBBConv2D layers to nn.Linear and nn.Conv2D
        
        if activation_type == 'softplus':
            self.act = nn.Softplus
        elif activation_type == 'relu':
            self.act = nn.ReLU

        self.conv1 = nn.Conv2d(inputs, 6, 5, padding=0, bias=True)
        self.act1 = self.act()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, 5, padding=0, bias=True)
        self.act2 = self.act()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(5 * 5 * 16, 120, bias=True)
        self.act3 = self.act()
        self.fc2 = nn.Linear(120, 84, bias=True)
        self.act4 = self.act()
        self.fc3 = nn.Linear(84, outputs, bias=True)


    def sample(self, bbbnet):
        '''
        Takes in a BBBLeNet instance and copies the structure into a LeNet model.
        Replaces the BBBLinear and BBBConv2D that uses sampling in their forward steps
        with regular nn.Linear and nn.Conv2d layers whose weights are initialized by 
        sampling the BBBLeNet model.
        '''    
        ### store activation function used by BNN, only relu and softplus  currently supported
        self.act1 = bbbnet.act()
        self.act2 = bbbnet.act()
        self.act3 = bbbnet.act()
        self.act4 = bbbnet.act()

        ### maxpool
        self.pool1 = nn.MaxPool2d(kernel_size=bbbnet.pool1.kernel_size, stride=bbbnet.pool1.stride)
        self.pool2 = nn.MaxPool2d(kernel_size=bbbnet.pool2.kernel_size, stride=bbbnet.pool2.stride)
        
        ### Create Convolution layers
        self.conv1 = nn.Conv2d(bbbnet.conv1.in_channels, bbbnet.conv1.out_channels, bbbnet.conv1.kernel_size,
                                stride=bbbnet.conv1.stride, padding=bbbnet.conv1.padding, dilation=bbbnet.conv1.dilation,
                                groups=bbbnet.conv1.groups)
        
        self.conv2 = nn.Conv2d(bbbnet.conv2.in_channels, bbbnet.conv2.out_channels, bbbnet.conv2.kernel_size,
                        stride=bbbnet.conv2.stride, padding=bbbnet.conv2.padding, dilation=bbbnet.conv2.dilation,
                        groups=bbbnet.conv2.groups)
        
        # follows the procedure for sampling in the forward methods of BBBConv and 
        # BBBLinearforward to create a fixed set of weights to use for the sampled model

        conv1_W_mu = bbbnet.conv1.W_mu
        conv1_W_rho = bbbnet.conv1.W_rho
        conv1_W_eps = torch.empty(conv1_W_mu.size()).normal_(0,1)
        conv1_W_sigma = torch.log1p(torch.exp(conv1_W_rho))
        conv1_weight = conv1_W_mu + conv1_W_eps * conv1_W_sigma
        if bbbnet.conv1.use_bias:
            conv1_bias_mu = bbbnet.conv1.bias_mu
            conv1_bias_rho = bbbnet.conv1.bias_rho
            conv1_bias_eps = torch.empty(conv1_bias_mu.size()).normal_(0,1)
            conv1_bias_sigma = torch.log1p(torch.exp(conv1_bias_rho))
            conv1_bias = conv1_bias_mu + conv1_bias_eps * conv1_bias_sigma
        else:
            conv1_bias = None
        self.conv1.weight.data = conv1_weight.data
        self.conv1.bias.data = conv1_bias.data


        conv2_W_mu = bbbnet.conv2.W_mu
        conv2_W_rho = bbbnet.conv2.W_rho
        conv2_W_eps = torch.empty(conv2_W_mu.size()).normal_(0,1)
        conv2_W_sigma = torch.log1p(torch.exp(conv2_W_rho))
        conv2_weight = conv2_W_mu + conv2_W_eps * conv2_W_sigma
        if bbbnet.conv2.use_bias:
            conv2_bias_mu = bbbnet.conv2.bias_mu
            conv2_bias_rho = bbbnet.conv2.bias_rho
            conv2_bias_eps = torch.empty(conv2_bias_mu.size()).normal_(0,1)
            conv2_bias_sigma = torch.log1p(torch.exp(conv2_bias_rho))
            conv2_bias = conv2_bias_mu + conv2_bias_eps * conv2_bias_sigma
        else:
            conv2_bias = None
        self.conv2.weight.data = conv2_weight.data
        self.conv2.bias.data = conv2_bias.data
        
        ### Create Linear Layers
        self.fc1 = nn.Linear(bbbnet.fc1.in_features, bbbnet.fc1.out_features, bbbnet.fc1.use_bias)
        self.fc2 = nn.Linear(bbbnet.fc2.in_features, bbbnet.fc2.out_features, bbbnet.fc2.use_bias)
        self.fc3 = nn.Linear(bbbnet.fc3.in_features, bbbnet.fc3.out_features, bbbnet.fc3.use_bias)

        fc1_W_mu = bbbnet.fc1.W_mu
        fc1_W_rho = bbbnet.fc1.W_rho
        fc1_W_eps = torch.empty(fc1_W_mu.size()).normal_(0,1)
        fc1_W_sigma = torch.log1p(torch.exp(fc1_W_rho))
        fc1_weight = fc1_W_mu + fc1_W_eps * fc1_W_sigma
        if bbbnet.fc1.use_bias:
            fc1_bias_mu = bbbnet.fc1.bias_mu
            fc1_bias_rho = bbbnet.fc1.bias_rho
            fc1_bias_eps = torch.empty(fc1_bias_mu.size()).normal_(0,1)
            fc1_bias_sigma = torch.log1p(torch.exp(fc1_bias_rho))
            fc1_bias = fc1_bias_mu + fc1_bias_eps * fc1_bias_sigma
        else:
            fc1_bias = None
        self.fc1.weight.data = fc1_weight.data
        self.fc1.bias.data = fc1_bias.data

        fc2_W_mu = bbbnet.fc2.W_mu
        fc2_W_rho = bbbnet.fc2.W_rho
        fc2_W_eps = torch.empty(fc2_W_mu.size()).normal_(0,1)
        fc2_W_sigma = torch.log1p(torch.exp(fc2_W_rho))
        fc2_weight = fc2_W_mu + fc2_W_eps * fc2_W_sigma
        if bbbnet.fc2.use_bias:
            fc2_bias_mu = bbbnet.fc2.bias_mu
            fc2_bias_rho = bbbnet.fc2.bias_rho
            fc2_bias_eps = torch.empty(fc2_bias_mu.size()).normal_(0,1)
            fc2_bias_sigma = torch.log1p(torch.exp(fc2_bias_rho))
            fc2_bias = fc2_bias_mu + fc2_bias_eps * fc2_bias_sigma
        else:
            fc2_bias = None
        self.fc2.weight.data = fc2_weight.data
        self.fc2.bias.data = fc2_bias.data

        fc3_W_mu = bbbnet.fc3.W_mu
        fc3_W_rho = bbbnet.fc3.W_rho
        fc3_W_eps = torch.empty(fc3_W_mu.size()).normal_(0,1)
        fc3_W_sigma = torch.log1p(torch.exp(fc3_W_rho))
        fc3_weight = fc3_W_mu + fc3_W_eps * fc3_W_sigma
        if bbbnet.fc3.use_bias:
            fc3_bias_mu = bbbnet.fc3.bias_mu
            fc3_bias_rho = bbbnet.fc3.bias_rho
            fc3_bias_eps = torch.empty(fc3_bias_mu.size()).normal_(0,1)
            fc3_bias_sigma = torch.log1p(torch.exp(fc3_bias_rho))
            fc3_bias = fc3_bias_mu + fc3_bias_eps * fc3_bias_sigma
        else:
            fc3_bias = None
        self.fc3.weight.data = fc3_weight.data
        self.fc3.bias.data = fc3_bias.data

        

    def forward(self, x):
        '''
        Forward method follow the order of BayesianLeNet
        '''
        x = self.conv1(x)
        x = self.act1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.act2(x)
        x = self.pool2(x)

        x = x.view(-1, 5 * 5 * 16)
        x = self.fc1(x)
        x = self.act3(x)
        x = self.fc2(x)
        x = self.act4(x)
        x = self.fc3(x)
        return x

In [63]:
net = BBBLeNet(outputs, inputs, priors, layer_type, activation_type)

In [15]:
def load_models(K = 100, modelname="models/model-cnn.pt"):
    # Load the models
    sampled_models = [LeNet(outputs, inputs, layer_type, activation_type) for i in range(K)]
    for net, state_dict in zip(sampled_models, torch.load(modelname)):
        net.load_state_dict(state_dict)
    print("Loaded %d sample models" % K)
    return sampled_models

## Evaluate Test Accuracy (eps=0.05)

### Load Test Sets

In [16]:
transform_back = transforms.Compose([transforms.Resize((28,28))])

In [68]:
dirs = [d for d in os.listdir("mnist_adv_CNN_0.05/") if "test_images_med" in d]

images  = []
targets = []
for d in dirs:
    with open("mnist_adv_CNN_0.05/" + d, 'rb') as handle:
        temp = pickle.load(handle)
        images.append(temp["images"])
        targets.append(temp["labels"])
        
images  = torch.vstack(images)
images  = transform_back(images)
images  = images[:,0,:,:]

targets = torch.hstack(targets)

# Test dataset
test_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)

# Test data loader with batch_size 128
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

# AdvTest dataset
adv_test_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)
adv_test_dataset.data    = None
adv_test_dataset.targets = None

adv_test_dataset.data    = images
adv_test_dataset.targets = targets

# Test data loader with batch_size 1
adv_test_loader = torch.utils.data.DataLoader(adv_test_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

In [69]:
dirs = [d for d in os.listdir("mnist_adv_CNN_0.05/") if "test_images_champ" in d]

images  = []
targets = []
for d in dirs:
    with open("mnist_adv_CNN_0.05/" + d, 'rb') as handle:
        temp = pickle.load(handle)
        images.append(temp["images"])
        targets.append(temp["labels"])
        
images  = torch.vstack(images)
images  = transform_back(images)
images  = images[:,0,:,:]
targets = torch.hstack(targets)

# AdvTest dataset
adv_champ_test_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)
adv_champ_test_dataset.data    = None
adv_champ_test_dataset.targets = None

adv_champ_test_dataset.data    = images
adv_champ_test_dataset.targets = targets

# Test data loader with batch_size 1
adv_champ_test_loader = torch.utils.data.DataLoader(adv_champ_test_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

In [70]:
dirs = [d for d in os.listdir("mnist_adv_CNN_0.05/") if "test_images_mean" in d]

images  = []
targets = []
for d in dirs:
    with open("mnist_adv_CNN_0.05/" + d, 'rb') as handle:
        temp = pickle.load(handle)
        images.append(temp["images"])
        targets.append(temp["labels"])
        
images  = torch.vstack(images)
images  = transform_back(images)
images  = images[:,0,:,:]
targets = torch.hstack(targets)

# AdvTest dataset
adv_mean_test_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)
adv_mean_test_dataset.data    = None
adv_mean_test_dataset.targets = None

adv_mean_test_dataset.data    = images
adv_mean_test_dataset.targets = targets

# Test data loader with batch_size 1
adv_mean_test_loader = torch.utils.data.DataLoader(adv_mean_test_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

In [71]:
dirs = [d for d in os.listdir("mnist_adv_CNN_0.18/") if "test_images_med" in d]

images  = []
targets = []
for d in dirs:
    with open("mnist_adv_CNN_0.18/" + d, 'rb') as handle:
        temp = pickle.load(handle)
        images.append(temp["images"])
        targets.append(temp["labels"])
        
images  = torch.vstack(images)
images  = transform_back(images)
images  = images[:,0,:,:]

targets = torch.hstack(targets)

# Test dataset
test_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)

# Test data loader with batch_size 128
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

# AdvTest dataset
adv_test_18_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)
adv_test_18_dataset.data    = None
adv_test_18_dataset.targets = None

adv_test_18_dataset.data    = images
adv_test_18_dataset.targets = targets

# Test data loader with batch_size 1
adv_test_18_loader = torch.utils.data.DataLoader(adv_test_18_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

In [72]:
dirs = [d for d in os.listdir("mnist_adv_CNN_0.18/") if "test_images_champ" in d]

images  = []
targets = []
for d in dirs:
    with open("mnist_adv_CNN_0.18/" + d, 'rb') as handle:
        temp = pickle.load(handle)
        images.append(temp["images"])
        targets.append(temp["labels"])
        
images  = torch.vstack(images)
images  = transform_back(images)
images  = images[:,0,:,:]

targets = torch.hstack(targets)

# Test dataset
test_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)

# Test data loader with batch_size 128
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

# AdvTest dataset
adv_champ_test_18_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)
adv_champ_test_18_dataset.data    = None
adv_champ_test_18_dataset.targets = None

adv_champ_test_18_dataset.data    = images
adv_champ_test_18_dataset.targets = targets

# Test data loader with batch_size 1
adv_champ_test_18_loader = torch.utils.data.DataLoader(adv_champ_test_18_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

In [73]:
dirs = [d for d in os.listdir("mnist_adv_CNN_0.18/") if "test_images_mean" in d]

images  = []
targets = []
for d in dirs:
    with open("mnist_adv_CNN_0.18/" + d, 'rb') as handle:
        temp = pickle.load(handle)
        images.append(temp["images"])
        targets.append(temp["labels"])
        
images  = torch.vstack(images)
images  = transform_back(images)
images  = images[:,0,:,:]

targets = torch.hstack(targets)

# Test dataset
test_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)

# Test data loader with batch_size 128
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

# AdvTest dataset
adv_mean_test_18_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)
adv_mean_test_18_dataset.data    = None
adv_mean_test_18_dataset.targets = None

adv_mean_test_18_dataset.data    = images
adv_mean_test_18_dataset.targets = targets

# Test data loader with batch_size 1
adv_mean_test_18_loader = torch.utils.data.DataLoader(adv_mean_test_18_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

In [74]:
dirs = [d for d in os.listdir("mnist_adv_CNN_0.50/") if "test_images_med" in d]

images  = []
targets = []
for d in dirs:
    with open("mnist_adv_CNN_0.50/" + d, 'rb') as handle:
        temp = pickle.load(handle)
        images.append(temp["images"])
        targets.append(temp["labels"])
        
images  = torch.vstack(images)
images  = transform_back(images)
images  = images[:,0,:,:]

targets = torch.hstack(targets)

# Test dataset
test_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)

# Test data loader with batch_size 128
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

# AdvTest dataset
adv_test_50_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)
adv_test_50_dataset.data    = None
adv_test_50_dataset.targets = None

adv_test_50_dataset.data    = images
adv_test_50_dataset.targets = targets

# Test data loader with batch_size 1
adv_test_50_loader = torch.utils.data.DataLoader(adv_test_50_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

In [75]:
dirs = [d for d in os.listdir("mnist_adv_CNN_0.50/") if "test_images_champ" in d]

images  = []
targets = []
for d in dirs:
    with open("mnist_adv_CNN_0.50/" + d, 'rb') as handle:
        temp = pickle.load(handle)
        images.append(temp["images"])
        targets.append(temp["labels"])
        
images  = torch.vstack(images)
images  = transform_back(images)
images  = images[:,0,:,:]

targets = torch.hstack(targets)

# Test dataset
test_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)

# Test data loader with batch_size 128
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

# AdvTest dataset
adv_champ_test_50_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)
adv_champ_test_50_dataset.data    = None
adv_champ_test_50_dataset.targets = None

adv_champ_test_50_dataset.data    = images
adv_champ_test_50_dataset.targets = targets

# Test data loader with batch_size 1
adv_champ_test_50_loader = torch.utils.data.DataLoader(adv_champ_test_50_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

In [76]:
dirs = [d for d in os.listdir("mnist_adv_CNN_0.50/") if "test_images_mean" in d]

images  = []
targets = []
for d in dirs:
    with open("mnist_adv_CNN_0.50/" + d, 'rb') as handle:
        temp = pickle.load(handle)
        images.append(temp["images"])
        targets.append(temp["labels"])
        
images  = torch.vstack(images)
images  = transform_back(images)
images  = images[:,0,:,:]

targets = torch.hstack(targets)

# Test dataset
test_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)

# Test data loader with batch_size 128
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

# AdvTest dataset
adv_mean_test_50_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=transform_mnist)
adv_mean_test_50_dataset.data    = None
adv_mean_test_50_dataset.targets = None

adv_mean_test_50_dataset.data    = images
adv_mean_test_50_dataset.targets = targets

# Test data loader with batch_size 1
adv_mean_test_50_loader = torch.utils.data.DataLoader(adv_mean_test_50_dataset, batch_size=128, shuffle=True,
                                         generator=torch.Generator().manual_seed(156))

In [26]:
img = next(iter(test_loader))

In [27]:
img[0].shape

torch.Size([128, 1, 32, 32])

In [28]:
def evaluate_accuracy(model, loader):
    total_correct = 0
    for data in loader:
        images, labels = data
        # images = images.view(-1, 28*28)
        y    = model(images)
        pred = torch.argmax(y, dim=1)
        total_correct += torch.sum(pred==labels).item()
    return total_correct / len(loader.dataset.targets)

### AdvBNN vs BNN

In [52]:
sampled_adv_models_50       = load_models(K = 50,modelname="models/model-cnn-adv-med0.50.pt")
sampled_adv_models_50_champ = load_models(K = 50,modelname="models/model-cnn-adv-champ0.50.pt")
sampled_adv_models_50_mean  = load_models(K = 50,modelname="models/model-cnn-adv-mean0.50.pt")

Loaded 50 sample models
Loaded 50 sample models
Loaded 50 sample models


In [29]:
sampled_adv_models_18       = load_models(K = 50,modelname="models/model-cnn-adv-med0.18.pt")
sampled_adv_models_18_champ = load_models(K = 50,modelname="models/model-cnn-adv-champ0.18.pt")
sampled_adv_models_18_mean  = load_models(K = 50,modelname="models/model-cnn-adv-mean0.18.pt")

Loaded 50 sample models
Loaded 50 sample models
Loaded 50 sample models


In [31]:
sampled_adv_models_05       = load_models(K = 50,modelname="models/model-cnn-adv-med0.05.pt")
sampled_adv_models_05_champ = load_models(K = 50,modelname="models/model-cnn-adv-champ0.05.pt")
sampled_adv_models_05_mean  = load_models(K = 50,modelname="models/model-cnn-adv-mean0.05.pt")

sampled_models        = load_models(K = 50,modelname="models/model-cnn.pt")

Loaded 50 sample models
Loaded 50 sample models
Loaded 50 sample models
Loaded 50 sample models


In [32]:
cnn_model = torch.load("models/CNN.pt")

#### CNN vs Test

In [44]:
evaluate_accuracy(cnn_model, test_loader)

0.9827

In [45]:
evaluate_accuracy(cnn_model, adv_test_loader)

0.901447451227187

In [46]:
evaluate_accuracy(cnn_model, adv_champ_test_loader)

0.897293895531781

In [47]:
evaluate_accuracy(cnn_model, adv_mean_test_loader)

0.9092511013215859

In [78]:
evaluate_accuracy(cnn_model, adv_test_18_loader)

0.5607749192792417

In [77]:
evaluate_accuracy(cnn_model, adv_test_50_loader)

0.025356576862123614

#### AdvBNN(eps=0.05) vs Test

In [40]:
accs = [evaluate_accuracy(model, test_loader) for model in sampled_adv_models_05]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.9887
mean:  0.9841300000000001 , sd:  0.0027
min:   0.9765


#### AdvBNN_champ(eps=0.05) vs Test

In [63]:
accs = [evaluate_accuracy(model, test_loader) for model in sampled_adv_models_05_champ]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.9886
mean:  0.985552 , sd:  0.003
min:   0.9686


#### AdvBNN_mean(eps=0.05) vs Test

In [48]:
accs = [evaluate_accuracy(model, test_loader) for model in sampled_adv_models_05_mean]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.9903
mean:  0.9864219999999999 , sd:  0.0021
min:   0.9778


#### AdvBNN(eps=18) vs Test

In [98]:
accs = [evaluate_accuracy(model, test_loader) for model in sampled_adv_models_18]
print("max: ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min: ",min(accs))

max:  0.9889
mean:  0.984654 , sd:  0.0036
min:  0.9647


#### AdvBNN_champ(eps=18) vs Test

In [102]:
accs = [evaluate_accuracy(model, test_loader) for model in sampled_adv_models_18_champ]
print("max: ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min: ",min(accs))

max:  0.9884
mean:  0.98449 , sd:  0.0025
min:  0.9777


#### AdvBNN_mean(eps=18) vs Test

In [49]:
accs = [evaluate_accuracy(model, test_loader) for model in sampled_adv_models_18_mean]
print("max: ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min: ",min(accs))

max:  0.9885
mean:  0.9850899999999999 , sd:  0.0026
min:  0.9771


#### AdvBNN(eps=50) vs Test

In [53]:
accs = [evaluate_accuracy(model, test_loader) for model in sampled_adv_models_50]
print("max: ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min: ",min(accs))

max:  0.9844
mean:  0.979686 , sd:  0.003
min:  0.9686


#### AdvBNN_champ(eps=50) vs Test

In [55]:
accs = [evaluate_accuracy(model, test_loader) for model in sampled_adv_models_50_champ]
print("max: ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min: ",min(accs))

max:  0.9863
mean:  0.9817400000000001 , sd:  0.0028
min:  0.9746


#### AdvBNN_mean(eps=50) vs Test

In [56]:
accs = [evaluate_accuracy(model, test_loader) for model in sampled_adv_models_50_mean]
print("max: ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min: ",min(accs))

max:  0.9882
mean:  0.9829079999999999 , sd:  0.003
min:  0.9752


#### BNN vs Test

In [41]:
accs = [evaluate_accuracy(model, test_loader) for model in sampled_models]
print("max: ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min: ",min(accs))

max:  0.9824
mean:  0.975064 , sd:  0.0083
min:  0.938


#### AdvBNN(eps=0.05) vs Adv Test

In [53]:
accs = [evaluate_accuracy(model, adv_test_loader) for model in  sampled_adv_models_05]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:  0.9448709880427942
mean:  0.925802391441158 , sd:  0.0118
min:  0.8974197608558842


#### AdvBNN(eps=0.05) vs Adv Test (Champ)

In [57]:
accs = [evaluate_accuracy(model, adv_champ_test_loader) for model in  sampled_adv_models_05]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:  0.9422278162366268
mean:  0.9207023285084958 , sd:  0.0123
min:  0.891881686595343


#### AdvBNN_champ(eps=0.05) vs AdvTest

In [64]:
accs = [evaluate_accuracy(model, adv_test_loader) for model in sampled_adv_models_05_champ]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.9553178099433606
mean:  0.9404581497797356 , sd:  0.0131
min:   0.8658275645059786


#### AdvBNN(eps=0.18) vs Adv Test

In [100]:
accs = [evaluate_accuracy(model, adv_test_loader) for model in  sampled_adv_models_18]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.9666456891126495
mean:  0.958431718061674 , sd:  0.0061
min:   0.938703587161737


#### AdvBNN_champ(eps=0.18) vs Adv Test

In [101]:
accs = [evaluate_accuracy(model, adv_test_loader) for model in  sampled_adv_models_18_champ]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.9676526117054751
mean:  0.956860918816866 , sd:  0.0072
min:   0.9384518565135305


#### AdvBNN(eps=50) vs AdvTest

In [90]:
accs = [evaluate_accuracy(model, adv_test_loader) for model in sampled_adv_models_50]
print("max: ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min: ",min(accs))

max:  0.9149150409062303
mean:  0.8933593455003145 , sd:  0.0145
min:  0.8488357457520453


#### AdvBNN_champ(eps=50) vs AdvTest

In [91]:
accs = [evaluate_accuracy(model, adv_test_loader) for model in sampled_adv_models_50_champ]
print("max: ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min: ",min(accs))

max:  0.917684078036501
mean:  0.8943687853996224 , sd:  0.017
min:  0.8586532410320956


#### BNN vs. Adv Test

In [54]:
accs = [evaluate_accuracy(model, adv_test_loader) for model in  sampled_models]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:  0.8837004405286344
mean:  0.8339332913782251 , sd:  0.0426
min:  0.6415355569540592


In [58]:
accs = [evaluate_accuracy(model, adv_champ_test_loader) for model in  sampled_models]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:  0.8764002517306482
mean:  0.8200956576463185 , sd:  0.0436
min:  0.62039018250472


### Adversarial Test (eps=0.18)

#### CNN vs Test

In [41]:
evaluate_accuracy(cnn_model, adv_test_18_loader)

0.5607749192792417

#### AdvBNN(eps=0.05) vs Adv Test (eps=0.18)

In [103]:
accs = [evaluate_accuracy(model, adv_test_18_loader) for model in  sampled_adv_models_05]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.7860639516717008
mean:  0.7239787522133111 , sd:  0.0357
min:   0.6259764607853349


#### AdvBNN_champ(eps=0.05) vs AdvTest

In [104]:
accs = [evaluate_accuracy(model, adv_test_18_loader) for model in sampled_adv_models_05_champ]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.8353296531611291
mean:  0.7690990521820644 , sd:  0.0488
min:   0.5811894594313093


#### AdvBNN(eps=0.18) vs Adv Test

In [105]:
accs = [evaluate_accuracy(model, adv_test_18_loader) for model in  sampled_adv_models_18]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.9731277991875846
mean:  0.961314446411832 , sd:  0.0061
min:   0.947817935631705


#### AdvBNN_champ(eps=0.18) vs Adv Test

In [106]:
accs = [evaluate_accuracy(model, adv_test_18_loader) for model in  sampled_adv_models_18_champ]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.9737527340901989
mean:  0.9638162691386314 , sd:  0.0069
min:   0.9361524841162379


#### AdvBNN(eps=0.50) vs Adv Test

In [92]:
accs = [evaluate_accuracy(model, adv_test_18_loader) for model in  sampled_adv_models_50]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.6363920424955734
mean:  0.5598500156233726 , sd:  0.0431
min:   0.4446411832100823


#### AdvBNN_champ(eps=0.50) vs Adv Test

In [93]:
accs = [evaluate_accuracy(model, adv_test_18_loader) for model in  sampled_adv_models_50_champ]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.695760858243933
mean:  0.5993146547234663 , sd:  0.0431
min:   0.4961983126757629


#### BNN vs. Adv Test

In [107]:
accs = [evaluate_accuracy(model, adv_test_18_loader) for model in  sampled_models]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.4770336423289241
mean:  0.3323757941881054 , sd:  0.05
min:   0.2345589001145714


In [58]:
accs = [evaluate_accuracy(model, adv_champ_test_loader) for model in  sampled_models]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:  0.8764002517306482
mean:  0.8200956576463185 , sd:  0.0436
min:  0.62039018250472


### Adversarial Test (eps=0.50)

#### CNN vs Test

In [79]:
evaluate_accuracy(cnn_model, adv_test_50_loader)

0.025356576862123614

#### AdvBNN(eps=0.05) vs Adv Test (eps=0.50)

In [80]:
accs = [evaluate_accuracy(model, adv_test_50_loader) for model in  sampled_adv_models_05]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.05852388498981209
mean:  0.044496264432872995 , sd:  0.0063
min:   0.03305410912383971


#### AdvBNN_champ(eps=0.05) vs Adv Test (eps=0.50)

In [82]:
accs = [evaluate_accuracy(model, adv_test_50_loader) for model in  sampled_adv_models_05_champ]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.06282544713606521
mean:  0.05027620556939099 , sd:  0.0062
min:   0.035884084220058864


In [104]:
accs = [evaluate_accuracy(model, adv_test_18_loader) for model in sampled_adv_models_05_champ]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.8353296531611291
mean:  0.7690990521820644 , sd:  0.0488
min:   0.5811894594313093


#### AdvBNN(eps=0.18) vs Adv Test

In [83]:
accs = [evaluate_accuracy(model, adv_test_50_loader) for model in  sampled_adv_models_18]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.22866198777450758
mean:  0.12820466379895856 , sd:  0.0327
min:   0.08852162100973511


#### AdvBNN_champ(eps=0.18) vs Adv Test

In [84]:
accs = [evaluate_accuracy(model, adv_test_50_loader) for model in  sampled_adv_models_18_champ]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.3239755490151687
mean:  0.14503282771111617 , sd:  0.0527
min:   0.0775413176364048


#### AdvBNN(eps=0.50) vs Adv Test

In [86]:
accs = [evaluate_accuracy(model, adv_test_50_loader) for model in  sampled_adv_models_50]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.9450984831333484
mean:  0.9181118406158026 , sd:  0.0137
min:   0.8784242698664252


#### AdvBNN_champ(eps=0.50) vs Adv Test

In [87]:
accs = [evaluate_accuracy(model, adv_test_50_loader) for model in  sampled_adv_models_50_champ]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.9307222096445551
mean:  0.9089472492642066 , sd:  0.0188
min:   0.8162780167534526


#### BNN vs. Adv Test

In [85]:
accs = [evaluate_accuracy(model, adv_test_50_loader) for model in  sampled_models]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:   0.05467511885895404
mean:  0.027729227982793755 , sd:  0.0094
min:   0.010640706361784016


In [58]:
accs = [evaluate_accuracy(model, adv_champ_test_loader) for model in  sampled_models]
print("max:  ",max(accs))
print("mean: ", np.mean(accs), ", sd: ", round(np.std(accs),4))
print("min:  ",min(accs))

max:  0.8764002517306482
mean:  0.8200956576463185 , sd:  0.0436
min:  0.62039018250472
