In [13]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
import logging
import os
import time

import numpy as np
import matplotlib.pyplot as plt
import foolbox
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim

from lib.dataset_utils import *
from lib.cifar_resnet import *
from lib.adv_model import *
from lib.dknn_attack_v2 import DKNNAttackV2
from lib.cwl2_attack import CWL2Attack
from lib.dknn import DKNNL2
from lib.utils import *
from lib.lip_model import *
from lib.cifar10_model import *

# from lib.cifar10_dcgan import Discriminator, Generator

In [15]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [16]:
# Set all random seeds
seed = 2019
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f6ad42467b0>

In [17]:
(x_train, y_train), (x_valid, y_valid), (x_test, y_test) = load_cifar10_all(
    '/data', val_size=0.1, seed=seed)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


---

Load Models

In [18]:
model_name = 'adv_cifar10_nca_exp3.h5'
# model_name = 'adv_cifar10_nca_exp19.h5'
net = NCAModel(PreActBlock, [2, 2, 2, 2],
               normalize=False, output_dim=20, init_it=1,
               train_it=False, train_data=(x_train, y_train))

# Set up model directory
save_dir = os.path.join(os.getcwd(), 'saved_models/')
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = net.to(device)
net.load_state_dict(torch.load(model_path))
net.eval()

NCAModel(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): Sequential(
    (0): PreActBlock(
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): PreActBlock(
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
  )
  (layer2): Sequential(
    (0): PreActBlock(
      (bn1): BatchNorm2d(64,

In [22]:
net.recompute_train_rep()

with torch.no_grad():
    batch_size = 200
    num_batches = int(np.ceil(len(x_test) / batch_size))
    logits = np.zeros((len(x_test), 10))
    for i in range(num_batches):
        logits[i * batch_size:(i + 1) * batch_size] = net.compute_logits(
            x_test[i * batch_size:(i + 1) * batch_size].cuda()).cpu().numpy()

In [23]:
ind = logits.argmax(1) == y_test.numpy()
ind = np.where(ind)[0]
print(len(ind) / len(x_test))

0.7656


In [24]:
from lib.cwl2_attack import CWL2AttackNCA
attack = CWL2AttackNCA(net)
num = 100

In [25]:
x_adv = attack(x_test[ind][:num].cuda(), y_test[ind][:num].cuda(), 
               targeted=False, init_mode=2,
               binary_search_steps=10, max_iterations=500, confidence=0,
               learning_rate=1e-2, initial_const=1, abort_early=True,
               rand_start_std=0.1, check_adv_steps=200)

    step: 0; loss: 0.491; l2dist: 0.000
    step: 50; loss: 0.459; l2dist: 0.134
    step: 100; loss: 0.462; l2dist: 0.135
binary step: 0; number of successful adv: 6/100
    step: 0; loss: 9.277; l2dist: 2.134
    step: 50; loss: 2.318; l2dist: 0.931
    step: 100; loss: 2.319; l2dist: 0.940
binary step: 1; number of successful adv: 61/100
    step: 0; loss: 25.875; l2dist: 2.134
    step: 50; loss: 4.732; l2dist: 1.296
    step: 100; loss: 4.713; l2dist: 1.299
    step: 150; loss: 4.702; l2dist: 1.300
    step: 200; loss: 4.702; l2dist: 1.302
binary step: 2; number of successful adv: 91/100
    step: 0; loss: 60.676; l2dist: 2.136
    step: 50; loss: 6.266; l2dist: 1.791
    step: 100; loss: 6.180; l2dist: 1.790
    step: 150; loss: 6.143; l2dist: 1.790
    step: 200; loss: 6.127; l2dist: 1.808
    step: 250; loss: 6.113; l2dist: 1.805
    step: 300; loss: 6.112; l2dist: 1.808
    step: 350; loss: 6.105; l2dist: 1.805
    step: 400; loss: 6.095; l2dist: 1.807
    step: 450; loss: 6.0

In [27]:
(x_adv.cpu() - x_test[ind][:num]).view(num, -1).norm(2, 1).mean()

tensor(1.2142, grad_fn=<MeanBackward0>)

In [26]:
pert = (x_adv.cpu() - x_test[ind]).view(x_adv.size(0), -1).norm(2, 1)
d1 = (len(ind) - (pert[ind_adv] < 0.5).sum().numpy()) / y_test.size(0)
d2 = (len(ind) - (pert[ind_adv] < 1).sum().numpy()) / y_test.size(0)
d3 = (len(ind) - (pert[ind_adv] < 1.5).sum().numpy()) / y_test.size(0)
print('& %.4f & %.4f & %.4f' % (d1, d2, d3))

& 0.0083 & 0.0063 & 0.0033


In [27]:
pert[ind_adv].mean()

tensor(1.2168, grad_fn=<MeanBackward0>)

In [None]:
ys_train = np.zeros((len(y_train), 10))
nb = dknn.get_neighbors(x_train, k=100)[0][1]
for i in range(len(y_train)):
    ys_train[i] = np.bincount(y_train[nb[i]], minlength=10) / 100

---

## DkNN

In [19]:
layers = ['fc']
dknn = DKNNL2(net, x_train, y_train, x_valid, y_valid, 
              layers, k=100, num_classes=10)

In [20]:
with torch.no_grad():
    y_pred = dknn.classify(x_test)
    ind = np.where(y_pred.argmax(1) == y_test.numpy())[0]
    print((y_pred.argmax(1) == y_test.numpy()).sum() / y_test.size(0))

0.7708


In [21]:
def attack_batch(attack, x, y, init_mode, init_mode_k, batch_size):
    x_adv = torch.zeros_like(x)
    total_num = x.size(0)
    num_batches = total_num // batch_size
    for i in range(num_batches):
        begin = i * batch_size
        end = (i + 1) * batch_size
        x_adv[begin:end] = attack(
            x[begin:end], y[begin:end], 2, guide_layer='fc', m=200,
            init_mode=init_mode, init_mode_k=init_mode_k,
            binary_search_steps=10, max_iterations=1000, learning_rate=1e-2,
            initial_const=1e0, max_linf=None, random_start=True,
            thres_steps=200, check_adv_steps=200, verbose=False)
    return x_adv

num = 100

def full_eval(dknn):
    with torch.no_grad():
        y_pred = dknn.classify(x_test)
        ind = np.where(y_pred.argmax(1) == y_test.numpy())[0]
    print((y_pred.argmax(1) == y_test.numpy()).sum() / y_test.size(0))
    
    dist_all = np.zeros(num) + 1e9
    attack = DKNNAttackV2(dknn)
    
    x_adv = attack_batch(
        attack, x_test[ind][:num].cuda(), y_test[ind][:num], 1, 1, 100)
    with torch.no_grad():
        y_pred = dknn.classify(x_adv)
        ind_adv = y_pred.argmax(1) != y_test[ind][:num].numpy()
        dist = (x_adv.cpu() - x_test[ind][:num]).view(
            num, -1).norm(2, 1).numpy()
    for i in range(num):
        if ind_adv[i] and (dist[i] < dist_all[i]):
            dist_all[i] = dist[i]
            
    for k in range(1, 6):
        x_adv = attack_batch(
            attack, x_test[ind][:num].cuda(), y_test[ind][:num], 2, k, 100)
        with torch.no_grad():
            y_pred = dknn.classify(x_adv)
            ind_adv = y_pred.argmax(1) != y_test[ind][:num].numpy()
            dist = (x_adv.cpu() - x_test[ind][:num]).view(
                num, -1).norm(2, 1).numpy()
        for i in range(num):
            if ind_adv[i] and (dist[i] < dist_all[i]):
                dist_all[i] = dist[i]
                
    adv_acc = (dist_all == 1e9).mean()
    print('adv accuracy: %.4f, mean dist: %.4f' % (
        adv_acc, dist_all[dist_all < 1e9].mean()))
    return dist_all

In [None]:
start = time.time()
dist = full_eval(dknn)
print(time.time() - start)

0.7708


In [23]:
dist.mean()

1.1592309394478797

---
## CW Attack on AT

In [35]:
model_name = 'adv_cifar10_exp6.h5'
net = PreActResNet(PreActBlock, [2, 2, 2, 2])
config = {'epsilon': 1,
          'num_steps': 10,
          'step_size': 0.2,
          'random_start': True,
          'loss_func': 'xent'}
net = PGDL2Model(net, config)

# Set up model directory
save_dir = os.path.join(os.getcwd(), 'saved_models/')
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = net.to(device)
net.load_state_dict(torch.load(model_path))
net = net.basic_net
net.eval()

PreActResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layer1): Sequential(
    (0): PreActBlock(
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): PreActBlock(
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
  )
  (layer2): Sequential(
    (0): PreActBlock(
      (bn1): BatchNorm2d

In [36]:
with torch.no_grad():
    batch_size = 200
    num_batches = int(np.ceil(len(x_test) / batch_size))
    logits = np.zeros((len(x_test), 10))
    for i in range(num_batches):
        logits[i * batch_size:(i + 1) * batch_size] = net(
            x_test[i * batch_size:(i + 1) * batch_size].cuda()).cpu().numpy()

In [37]:
ind = logits.argmax(1) == y_test.numpy()
ind = np.where(ind)[0]
print(len(ind) / len(x_test))

0.7565


In [None]:
from lib.cwl2_attack import CWL2Attack

attack = CWL2Attack(net, x_train=x_train, y_train=y_train)

def attack_batch(x, y, batch_size):
    x_adv = torch.zeros_like(x)
    total_num = x.size(0)
    num_batches = total_num // batch_size
    for i in range(num_batches):
        begin = i * batch_size
        end = (i + 1) * batch_size
        x_adv[begin:end] = attack(
            x[begin:end], y[begin:end], targeted=False,
            init_mode=2, binary_search_steps=10, max_iterations=500,
            confidence=0, learning_rate=1e-2, initial_const=1, 
            abort_early=False, rand_start_std=0.1, check_adv_steps=100)
    return x_adv

num = 100
x_adv = attack_batch(x_test[ind][:num].cuda(), y_test[ind][:num].cuda(), 100)

In [39]:
(x_adv.cpu() - x_test[ind][:num]).view(num, -1).norm(2, 1).mean()

tensor(1.4624, grad_fn=<MeanBackward0>)

---

## RotNet

Train small network on top of pre-trained rotation network

In [27]:
opt = {'num_classes': 4, 'num_stages': 4}
net = NetworkInNetwork(opt)

In [28]:
net.load_state_dict(torch.load('saved_models/model_net_epoch200')['network'])

In [29]:
net = net._feature_blocks

In [30]:
net_wrap = NINWrapper(net, block=2)
net_wrap = net_wrap.cuda().eval()

In [33]:
# net_wrap.fc = nn.Sequential(
#     nn.Linear(12288, 200),
#     nn.ReLU(inplace=True),
#     nn.Linear(200, 200),
#     nn.ReLU(inplace=True),
#     nn.Linear(200, 128))
net_wrap.fc = nn.Sequential(
    nn.Linear(12288, 2000),
    nn.ReLU(inplace=True),
    nn.Linear(2000, 400),
    nn.ReLU(inplace=True),
    nn.Linear(400, 128))
net_wrap = net_wrap.to('cuda')

In [44]:
net_wrap.load_state_dict(torch.load('saved_models/transfer_cifar10_exp11.h5'))

In [81]:
net = PreActResNet(PreActBlock, [2, 2, 2, 2], num_classes=4)
net.load_state_dict(torch.load('saved_models/rot_cifar10_exp0.h5'))
net_wrap = ResNetWrapper(net, block=3, dim=16384)
for param in net_wrap.parameters():
    param.requires_grad = False
# net_wrap.fc = nn.Sequential(
#     nn.Linear(16384, 2000),
#     nn.ReLU(inplace=True),
#     nn.Linear(2000, 400),
#     nn.ReLU(inplace=True),
#     nn.Linear(400, 128),
# )
net_wrap.fc = nn.Sequential(
    nn.BatchNorm1d(16384),
    nn.Linear(16384, 2000),
    nn.ReLU(inplace=True),
    nn.BatchNorm1d(2000),
    nn.Linear(2000, 400),
    nn.ReLU(inplace=True),
    nn.BatchNorm1d(400),
    nn.Linear(400, 128),
)
net_wrap.load_state_dict(torch.load('saved_models/transfer_cifar10_exp18.h5'))
net_wrap = net_wrap.to('cuda').eval()

In [82]:
layers = ['fc']

dknn = DKNNL2(net_wrap, x_train, y_train, x_valid, y_valid, layers, 
              k=1, num_classes=10)

In [101]:
with torch.no_grad():
    y_pred = dknn.classify(x_test)
    print((y_pred.argmax(1) == y_test.numpy()).sum() / y_test.size(0))

0.7015


In [99]:
with torch.no_grad():
    num = 0
    for i in range(x_test.size(0) // 100):
        begin = i * 100
        end = (i + 1) * 100
        y_pred = net_wrap(x_test[begin:end].to('cuda'))
        num += (y_pred.argmax(1).cpu() == y_test[begin:end]).sum().numpy()
    print(num / y_test.size(0))

0.0011


In [59]:
x_adv = pickle.load(open('x_adv/x_adv_adv_cifar10_exp0.h5.p', 'rb'))
# x_adv = pickle.load(open('x_adv/x_ba_cifar10_adv2_0.2_0.001.p', 'rb'))
# x_adv = pickle.load(open('x_adv/x_adv_cifar10_resnet_exp2.h5.p', 'rb'))

In [56]:
torchvision.utils.save_image(x_adv[:10], 'cifar10_cw_adv.png', nrow=10, padding=0)

In [10]:
torchvision.utils.save_image(x_test[:10], 'cifar10_test.png', nrow=10, padding=0)

In [75]:
diff = x_adv[:10].cpu() - x_test[:10]
torchvision.utils.save_image(diff, 'diff_cw_adv.png', nrow=10, padding=0, normalize=True)

In [50]:
(x_adv - x_test[ind])[:100].view(100, -1).norm(2, 1)

tensor([0.1601, 0.1639, 0.0439, 0.0925, 0.2344, 0.1875, 0.1263, 0.0629, 0.1313,
        0.0308, 0.1699, 0.2601, 0.1186, 0.2549, 0.2104, 0.0949, 0.0168, 0.1908,
        0.2373, 0.2276, 0.0749, 0.1144, 0.0599, 0.3487, 0.0113, 0.1286, 0.0399,
        0.1052, 0.1184, 0.1812, 0.1033, 0.1054, 0.1015, 0.0491, 0.2473, 0.1405,
        0.1351, 0.3051, 0.2656, 0.1246, 0.1826, 0.1177, 0.1306, 0.2810, 0.2485,
        0.0251, 0.0446, 0.2726, 0.0799, 0.2484, 0.1870, 0.0150, 0.0376, 0.1555,
        0.1845, 0.0784, 0.0302, 0.0403, 0.0080, 0.1847, 0.1957, 0.1100, 0.0935,
        0.1443, 0.0339, 0.1736, 0.0629, 0.1216, 0.0863, 0.1177, 0.0650, 0.1191,
        0.1149, 0.3842, 0.2538, 0.1573, 0.1572, 0.1589, 0.0625, 0.2314, 0.1450,
        0.0575, 0.0426, 0.0426, 0.0458, 0.2055, 0.2434, 0.1804, 0.0808, 0.2458,
        0.2491, 0.0758, 0.0949, 0.1004, 0.0661, 0.2742, 0.2168, 0.0666, 0.0765,
        0.2380])

In [None]:
(x_adv.cpu() - x_test[ind][:100]).view(100, -1).norm(2, 1)