<center><h1>WRN: Cifar10</h1></center>

## Imports

In [1]:
from __future__ import division,print_function

%matplotlib inline
%load_ext autoreload
%autoreload 2

import sys
from tqdm import tqdm_notebook as tqdm

import random
import matplotlib.pyplot as plt
import math

import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.nn.init as init
from torch.autograd import Variable, grad
from torchvision import datasets, transforms
from torch.nn.parameter import Parameter

import utils.calculate_log as callog
from utils.wrn import WideResNet

from utils.detector import Detector, gram_margin_loss
import utils.attacks as attacks

import warnings
warnings.filterwarnings('ignore')

In [2]:
torch.cuda.set_device(3)

## Model definition

In [3]:
torch_model = WideResNet(depth=40, widen_factor=2, num_classes=10)

torch_model.load(path="benchmark_ckpts/cifar10_reg_training_99.pt")
torch_model.cuda()
torch_model.params = list(torch_model.parameters())
torch_model.eval()
print("Done")    

Done


## Datasets

<b>In-distribution Datasets</b>

In [4]:
batch_size = 256
# mean = np.array([[125.3/255, 123.0/255, 113.9/255]]).T

# std = np.array([[63.0/255, 62.1/255.0, 66.7/255.0]]).T
# normalize = transforms.Normalize((125.3/255, 123.0/255, 113.9/255), (63.0/255, 62.1/255.0, 66.7/255.0))

normalize = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
        
    ])
transform_test = transforms.Compose([
        transforms.CenterCrop(size=(32, 32)),
        transforms.ToTensor(),
        normalize
    ])

train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('~/datasets/cifarpy', train=True, download=True,
                   transform=transform_train),
    batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('~/datasets/cifarpy', train=False, transform=transform_test),
    batch_size=batch_size)


detector_data_transform = transforms.Compose([transforms.ToTensor(), normalize])
data_train = list(torch.utils.data.DataLoader(
        datasets.CIFAR10('~/datasets/cifarpy', 
                     train=True, 
                     transform=detector_data_transform, 
                     download=True),
        batch_size=1, shuffle=False))

data_test = list(torch.utils.data.DataLoader(
        datasets.CIFAR10('~/datasets/cifarpy', 
                     train=False, 
                     transform=detector_data_transform, 
                     download=True),
        batch_size=1, shuffle=False))

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [5]:
def pipeline_batch(bxs):
    pil = transforms.ToPILImage()
    return torch.squeeze(torch.stack([transform_test(pil(bx)) for bx in bxs]), dim=1)

def get_batches(d, batch_size=32):
    bx = []
    by = []
    tens = transforms.ToTensor()
    for idx in range(0,len(d),batch_size):
        bx_batch = torch.squeeze(torch.stack([tens(x[0]) for x in d[idx:idx+batch_size]]),dim=1)
        bx.append(bx_batch)
        by.append(torch.Tensor([x[1] for x in d[idx:idx+batch_size]]).type(torch.LongTensor))
    
    return bx, by

def advs_p(p, bxs, bys, nrof_batches=None):
    if nrof_batches is None:
        nrof_batches = len(bxs)
        
    advs = []
    for i in tqdm(range(len(bxs))):
        if i >= nrof_batches:
            break
        
        _, feats_reg = torch_model.gram_forward((bxs[i]*2 - 1).cuda())
        advs_batch = p(torch_model, bxs[i].cuda(), bys[i].cuda(), feats_reg)

        advs.append(advs_batch)

    torch.cuda.empty_cache()
    
    return advs

def adversarial_acc(advs, bys):
    torch_model.eval()
    correct = 0
    total = 0

    for i in range(len(advs)):
        pipelined = pipeline_batch(advs[i].cpu())

        x = pipelined.cuda()
        y = bys[i].numpy()

        correct += (y==np.argmax(torch_model(x).detach().cpu().numpy(),axis=1)).sum()
        total += y.shape[0]


    print("Adversarial Test Accuracy: ", correct/total)
    
def ds_grouped(bxs, bys):
    ds = []
    for i in range(len(bxs)):
        pipelined = pipeline_batch(bxs[i].cpu())
        for j in range(len(bxs[i])):
            ds.append((pipelined[j], bys[i][j]))
    return ds

def adversarial_scores(detector, advs_batches, pbar = lambda x, total=None: x):
    auroc = []
    for batch in pbar(advs_batches):
        auroc.append(detector.compute_ood_deviations_batch(batch*2 - 1)["AUROC"])
    
    return np.mean(auroc)
    
    
def model_accuracy():
    torch_model.eval()
    correct = 0
    total = 0
    for x,y in test_loader:
        x = x.cuda()
        y = y.numpy()
        correct += (y==np.argmax(torch_model(x).detach().cpu().numpy(),axis=1)).sum()
        total += y.shape[0]
        
    return correct/total

<center><h1> Results </h1></center>

In [6]:
model_accuracy()

0.9462

In [7]:
adversary = attacks.PGD(epsilon=8./255, num_steps=10, step_size=2./255).cuda()

In [8]:
detector = Detector(torch_model, data_train, data_test, 512, pbar=None)

In [12]:
cifar10 = list(datasets.CIFAR10('~/datasets/cifarpy', train=False))

print("Calculating L_Inf")
xs, ys = get_batches(cifar10, batch_size=128)
# pinf = PGD()
# pinf = adversary
pinf = PGD_margin().cuda()
advs_inf = advs_p(pinf, xs, ys)

adversarial_acc(advs_inf, ys)

adversarial_scores(detector, advs_inf, pbar=tqdm)

Calculating L_Inf


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))


Adversarial Test Accuracy:  0.088


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))




0.9464791337025317

In [32]:
cifar10 = list(datasets.CIFAR10('~/datasets/cifarpy', train=False))
xs, ys = get_batches(cifar10, batch_size=32)

In [38]:
attacker = PGD_margin().cuda()
def process_batch(x, y):
    x, y = x.cuda(), y.cuda()
    
    ouptut_reg, feats_reg = torch_model.gram_forward(x*2 - 1)
    adv_x = attacker(torch_model, x, y, feats_reg)
    
    output_adv, feats_adv = torch_model.gram_forward(adv_x * 2 - 1)

    margin_loss = gram_margin_loss(feats_reg, feats_adv, 20)
    
    return float(margin_loss), float(output_adv.max(dim=1)[1].eq(y).sum())

In [39]:
losses = []
for i, x in tqdm(enumerate(xs), total=len(xs)):
    if i % 5 != 0:
        continue
        
    loss, accuracy = process_batch(x, ys[i])
    
    print(loss, accuracy/32)

HBox(children=(FloatProgress(value=0.0, max=313.0), HTML(value='')))

0.017739875242114067 0.15625
1.718410611152649 0.15625
6.364144325256348 0.15625
1.626102328300476 0.09375
0.0 0.03125
0.0 0.03125
2.3059892654418945 0.15625
0.0 0.09375
0.9669406414031982 0.03125


KeyboardInterrupt: 

In [40]:
np.mean(losses)

0.19279666803777218

In [10]:
def G_p(temp):
    temp = temp.reshape(temp.shape[0],temp.shape[1],-1)
    temp = ((torch.matmul(temp,temp.transpose(dim0=2,dim1=1)))).sum(dim=2)
    return temp.reshape(temp.shape[0],-1)

class PGD_margin(nn.Module):
    def __init__(self, epsilon=8./255, num_steps=10, step_size=2./255, grad_sign=True):
        super().__init__()
        self.epsilon = epsilon
        self.num_steps = num_steps
        self.step_size = step_size
        self.grad_sign = grad_sign

    def forward(self, model, bx, by, feats_reg):
        """
        :param model: the classifier's forward method
        :param bx: batch of images
        :param by: true labels
        :return: perturbed batch of images
        """
        adv_bx = bx.detach()
        adv_bx += torch.zeros_like(adv_bx).uniform_(-self.epsilon, self.epsilon)

        for i in range(self.num_steps):
            adv_bx.requires_grad_()
            with torch.enable_grad():
                logits, feats_adv = model.gram_forward(adv_bx * 2 - 1)
                margin_loss = gram_margin_loss(feats_adv,feats_reg, margin=200).cuda()
                
                loss = 1/2 * F.cross_entropy(logits, by, reduction='sum') - 1/2 * margin_loss
                
            grad = torch.autograd.grad(loss, adv_bx, only_inputs=True)[0]
            adv_bx = adv_bx.detach() + self.step_size * torch.sign(grad.detach())
            adv_bx = torch.min(torch.max(adv_bx, bx - self.epsilon), bx + self.epsilon).clamp(0, 1)
            
        return adv_bx


In [15]:
print("Benchmark For a Cifar10 WideResNet Trained With OE\n")

print("Model Accuracy On Test Set:", model_accuracy())
adversarial_acc(advs_inf, ys)
print("Detection Benchmark:")
adversarial_scores(advs_inf, ys, powers=powers)

print("\nAverage Gram Deviations For Test Set: {}\n".format(calc_gram_dev_target()))


Benchmark For a Cifar10 WideResNet Trained With OE

Model Accuracy On Test Set: 0.5494
Adversarial Test Accuracy:  0.0014
Detection Benchmark:
 TNR    AUROC  DTACC  AUIN   AUOUT 
 56.273 75.262 77.094 58.851 84.621

Average Gram Deviations For Test Set: 0.6627426147460938



In [26]:
print("–––– Create Undetectible Adversarial Attacks ––––")
print("Epsilon: 8/255, Num Steps: 10, Step Size: 2/255")

p_gram = PGD_Gram(gram_target=calc_gram_dev_target(), verbose=True)
advs_gram = advs_p(p_gram, xs, ys, nrof_batches = 1)
adversarial_acc(advs_gram, ys)
adversarial_scores(advs_gram, ys, powers)

–––– Create Undetectible Adversarial Attacks ––––
Epsilon: 8/255, Num Steps: 10, Step Size: 2/255


HBox(children=(FloatProgress(value=0.0, max=313.0), HTML(value='')))

Step: 0, Cent: 0.14773714542388916, Gram: 73.73921203613281, Total Loss: -73.59147644042969
Step: 1, Cent: 0.6315072178840637, Gram: 1.5961074829101562, Total Loss: -0.9646002650260925
Step: 2, Cent: 1.6839289665222168, Gram: 16.002609252929688, Total Loss: -14.318679809570312
Step: 3, Cent: 2.3709537982940674, Gram: 60.39494323730469, Total Loss: -58.023990631103516
Step: 4, Cent: 4.563270568847656, Gram: 0.0, Total Loss: 4.563270568847656
Step: 5, Cent: 6.832106590270996, Gram: 124.79806518554688, Total Loss: -117.96595764160156
Step: 6, Cent: 5.832967281341553, Gram: 0.0, Total Loss: 5.832967281341553
Step: 7, Cent: 8.62597942352295, Gram: 36.21221923828125, Total Loss: -27.586238861083984
Step: 8, Cent: 7.858233451843262, Gram: 0.0, Total Loss: 7.858233451843262
Step: 9, Cent: 10.103129386901855, Gram: 9.694610595703125, Total Loss: 0.40851879119873047
Adversarial Test Accuracy:  0.03125
 TNR    AUROC  DTACC  AUIN   AUOUT 
  3.125 32.498 53.872 98.666  0.325


In [106]:
adversarial_acc(advs_gram, ys)
adversarial_scores(advs_gram, ys, powers)

Adversarial Test Accuracy:  0.0805
 TNR    AUROC  DTACC  AUIN   AUOUT 
 10.962 49.469 59.682 43.090 58.825


In [66]:
def G_p(temp):
    temp = temp.reshape(temp.shape[0],temp.shape[1],-1)
    temp = ((torch.matmul(temp,temp.transpose(dim0=2,dim1=1)))).sum(dim=2)
    return temp.reshape(temp.shape[0],-1)

def proto_gram_margin_loss(feats_reg, feats_adv, margin):
    assert len(feats_reg) == len(feats_adv)

    layer_deviations = torch.zeros((len(feats_reg), len(feats_reg[0])))
    for i in range(len(feats_reg)):
        g_p_reg = G_p(feats_reg[i])
        g_p_adv = G_p(feats_adv[i])
                
        clamp = torch.tensor(1.0).cuda()
        
        orig_max = torch.max(g_p_reg, dim=1)[0]
        clamped_max = torch.max(torch.abs(orig_max), clamp)
               
        max_dist = F.relu(torch.max(g_p_adv, dim=1)[0] - orig_max)/clamped_max
        
        orig_min = torch.min(g_p_reg, dim=1)[0]
        clamped_min = torch.max(torch.abs(orig_min), clamp)
        
        min_dist = F.relu(orig_min - torch.min(g_p_adv, dim=1)[0])/clamped_min
        
        layer_deviations[i] = max_dist + min_dist
        
    return F.relu(margin - layer_deviations.sum(dim=0)).pow(2).mean()

In [18]:
def calc_gram_dev_target():
    return detector.all_test_deviations.mean(axis=0).sum() 

def G_p_gpu(ob, p):
    temp = ob
    
    temp = temp**p
    temp = temp.reshape(temp.shape[0],temp.shape[1],-1)
    temp = ((torch.matmul(temp,temp.transpose(dim0=2,dim1=1)))).sum(dim=2) 
    temp = (temp.sign()*torch.abs(temp)**(1/p)).reshape(temp.shape[0],-1)
    
    return temp

class PGD_Gram(nn.Module):
    def __init__(self, epsilon=8/255, num_steps=10, step_size=2/255, grad_sign=True, 
                         mean = None, std = None, nrof_classes=10, gram_target = 247, verbose=True):
        super().__init__()
        self.epsilon = epsilon
        self.num_steps = num_steps
        self.step_size = step_size
        self.grad_sign = grad_sign
        
        if mean is None:
            self.mean = torch.FloatTensor([0.4914, 0.4822, 0.4465]).view(1,3,1,1).cuda()
        else:
            self.mean = torch.FloatTensor(mean).view(1,3,1,1).cuda()
        if std is None:
            self.std = torch.FloatTensor([0.2023, 0.1994, 0.2010]).view(1,3,1,1).cuda()
        else:
            self.std = torch.FloatTensor(std).view(1,3,1,1).cuda()
            
        self.mns = [cuda(detector.mins[i]) for i in range(nrof_classes)]
        self.mxs = [cuda(detector.maxs[i]) for i in range(nrof_classes)]
        self.gram_target = gram_target * 0.85
        self.verbose = verbose
            
    def get_deviation(self, feat_list, idx, mins, maxs, power=powers):
        batch_deviations = []
        for L,feat_L in enumerate(feat_list):
            dev = 0
            for p,P in enumerate(power):
                g_p = G_p_gpu(feat_L,P)[idx]
                
                dev +=  (F.relu(mins[L][p]-g_p)/torch.abs(mins[L][p]+10**-6)).sum(dim=1,keepdim=True)
                dev +=  (F.relu(g_p-maxs[L][p])/torch.abs(maxs[L][p]+10**-6)).sum(dim=1,keepdim=True)
                
                batch_deviations.append(dev)
                
        return batch_deviations
        
    def gram_loss(self, feats, logits):
        confs = F.softmax(logits, dim=1)
        _, indices = torch.max(confs, 1)
        
        loss = 0
        for i in range(10):
            idxs = indices == i

            if idxs.sum() == 0:
                continue
            
            batch_dev = self.get_deviation(feats, idxs, mins=self.mns[i], maxs=self.mxs[i])
            batch_dev = torch.squeeze(torch.stack(batch_dev, dim=1))
            
            loss += batch_dev.sum()
                
        return F.relu(loss - logits.shape[0] * self.gram_target)
    
    def forward(self, model, bx, by):
        """
        :param model: the classifier's forward method
        :param bx: batch of images
        :param by: true labels
        :return: perturbed batch of images
        """
        model.eval()
        
        adv_bx = bx.detach()
        adv_bx += torch.zeros_like(adv_bx).uniform_(-self.epsilon, self.epsilon)

        for i in range(self.num_steps):
            adv_bx.requires_grad_()
            with torch.enable_grad():
                logits, feats = model.gram_forward((adv_bx - self.mean)/self.std)
                
                cent_loss = F.cross_entropy(logits, by, reduction='mean')
                gram_loss = self.gram_loss(feats, logits)
                
                loss = cent_loss - gram_loss
                
            if self.verbose:
                print("Step: {}, Cent: {}, Gram: {}, Total Loss: {}".format(i, cent_loss, gram_loss, loss))
            
            grad = torch.autograd.grad(loss, adv_bx, only_inputs=True)[0]
            adv_bx = adv_bx.detach() + self.step_size * torch.sign(grad.detach())
            adv_bx = torch.min(torch.max(adv_bx, bx - self.epsilon), bx + self.epsilon).clamp(0, 1)

        return adv_bx

NameError: name 'powers' is not defined