# Adversarial attack experiment plots

In this notebook, we will examine internal layer representations for a classifier trained to recognize scene categories.

Setup matplotlib, torch, and numpy for a high-resolution browser.

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt
import matplotlib as mpl
from importlib import reload
import IPython
mpl.rcParams['lines.linewidth'] = 0.25
mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.right'] = False
mpl.rcParams['axes.linewidth'] = 0.25

Set up experiment directory and settings

In [None]:
import torch, argparse, os, shutil, inspect, json, numpy
import netdissect
from netdissect.easydict import EasyDict
from netdissect import experiment
from netdissect.experiment import resfile
from netdissect import pbar, nethook, renormalize, parallelfolder, pidfile
from netdissect import upsample, tally, imgviz, imgsave, bargraph, show
reload(imgviz)
# choices are alexnet, vgg16, or resnet152.
args = EasyDict(model='vgg16', dataset='places', seg='netpqc', layer=None)
resdir = 'results/%s-%s-%s' % (args.model, args.dataset, args.seg)
experiment.set_result_dir(resdir)

load classifier model and dataset

In [None]:
model = experiment.load_model(args)
layername = experiment.instrumented_layername(args)
model.retain_layer(layername)
dataset = experiment.load_dataset(args)
upfn = experiment.make_upfn(args, dataset, model, layername)
sample_size = len(dataset)

print('Inspecting layer %s of model %s on %s' % (layername, args.model, args.dataset))

In [None]:
# Classifier labels
from urllib.request import urlopen
from netdissect import renormalize
import pickle
import numpy as np

percent_level=0.995
classlabels = dataset.classes
renorm = renormalize.renormalizer(dataset, mode='zc')
pbar.descnext('rq')
def compute_samples(batch, *args):
    image_batch = batch.cuda()
    _ = model(image_batch)
    acts = model.retained_layer(layername)
    hacts = upfn(acts)
    return hacts.permute(0, 2, 3, 1).contiguous().view(-1, acts.shape[1])
rq = tally.tally_quantile(compute_samples, dataset,
                          sample_size=sample_size,
                          r=8192,
                          num_workers=100,
                          pin_memory=True,
                          cachefile=resfile('rq.npz'))
from netdissect import imgviz
iv = imgviz.ImageVisualizer((100, 100), source=dataset, quantiles=rq, level=rq.quantiles(percent_level))


In [None]:
import pickle
import numpy as np
def pickle_load(file_name):
    data = None
    with open(file_name, 'rb') as f:
        data = pickle.load(f)
    return data

In [None]:
# Separate impact on imporant units from unimportant ones
barchart_inc_mean = {}
barchart_inc_err = {}
barchart_other_mean = {}
barchart_other_err = {}
good_index_map = {}

num_imp_units = 4
num_images = 1000
ci_99_factor = 2.58

data = numpy.load('results/vgg16-places-netpqc-10/ttv_unit_ablation.npz')
sua = torch.from_numpy(data['single_unit_ablation_ba'])
base = torch.from_numpy(data['baseline_ba'])
sua_diff = sua - base[None, :]

expts = ['bedroom', 'computer_room', 'hangar-indoor', 'ski_resort', 'volleyball_court-outdoor']
for target_class in expts:
    class_id = classlabels.index(target_class)
    all_unit_ids = sua_diff[:,class_id].sort(0)[1].numpy().tolist()
    unit_ids = all_unit_ids[:num_imp_units]
    other_ids = all_unit_ids[num_imp_units:]
    good_indices = []
    image_index = 0
    while (True):
        out = model(dataset[image_index][0][None,...].cuda())
        gt_label = classlabels[dataset[image_index][1]]
        pred_label = classlabels[out.max(1)[1][0]]
        if gt_label != target_class and pred_label != target_class:
            good_indices.append(image_index)
        else:
            print('image {:d} gt {:s} pred {:s}'.format(image_index, gt_label, pred_label))

        image_index += 1
        if len(good_indices) == num_images: 
            print('get {:d} images from {:d} candidates'.format(num_images, image_index))
            break
    # loading results 
    results_dir = 'results/adv/vgg16/{:s}_images'.format(target_class)
    acts_mean_all = []
    acts_mean_abs_all = []
    acts_mean_mean_abs_all = []
    acts_max_abs_all = []
    for good_index in range(1010):
        result_path = os.path.join(results_dir, 'image_{:d}_target_{:s}.pkl'.format(good_index, target_class))
        try:
            data = pickle_load(result_path)
        except:
            pass
        image_id = data['image_id']
        target_id = data['target_id']
        ori_image = data['ori']
        adv_image = data['adv']
        pred_ori = model(dataset[image_id][0][None,...].cuda())
        image_ori = dataset[image_id][0]
        acts_ori = model.retained_layer(layername).cpu()

        adv = renormalize.as_tensor(adv_image, source='pt', mode='imagenet')[None,...]
        pred_adv = model(adv.cuda())

        acts_adv = model.retained_layer(layername).cpu()

        acts_mean_abs = (acts_adv - acts_ori).abs().mean(dim=(2, 3)).numpy()
        acts_mean = (acts_adv - acts_ori).abs().mean(dim=(2, 3)).numpy()
        acts_mean_all.append(acts_mean_abs)
        acts_mean_abs_all.append(acts_mean_abs)
        acts_mean_mean_abs_all.append((acts_adv.mean(3).mean(2) - acts_ori.mean(3).mean(2)).abs().numpy())
        acts_max_abs_all.append((acts_adv.max(3)[0].max(2)[0] - acts_ori.max(3)[0].max(2)[0]).abs().numpy())
    acts_max_abs_all = np.concatenate(acts_max_abs_all, axis=0)
    # Store two things
    important_changes = acts_max_abs_all[good_indices][:,unit_ids]
    other_changes = acts_max_abs_all[good_indices][:,other_ids]
    ic_mean = important_changes.mean()
    oc_mean = other_changes.mean()
    ic_err = important_changes.std() / numpy.sqrt(len(important_changes)) * ci_99_factor
    oc_err = other_changes.std() / numpy.sqrt(len(other_changes)) * ci_99_factor
    
    barchart_inc_mean[target_class] = ic_mean
    barchart_inc_err[target_class] = ic_err
    barchart_other_mean[target_class] = oc_mean
    barchart_other_err[target_class] = oc_err



In [None]:
fig, ax = plt.subplots(figsize=(7,2), dpi=300)
ax.bar(np.arange(len(expts))-0.2,
       [barchart_inc_mean[target_class] for target_class in expts],
        width=0.28,
        yerr=[barchart_inc_err[target_class] for target_class in expts],
        error_kw=dict(lw=1, capsize=5, capthick=1),
       color="#4B4CBF",
       label = "4 most important units"
      )
ax.bar(np.arange(len(expts))+0.2, 
       [barchart_other_mean[target_class] for target_class in expts],
       width=0.28,
        yerr=[barchart_other_err[target_class] for target_class in expts],
        error_kw=dict(lw=1, capsize=5, capthick=1),
       color="#F0883B",
       label = "other units"
      )


ax.set_ylabel('peak activation change')
ax.set_xticks([0, 1, 2, 3])
ax.set_xticklabels([{
    'bedroom': 'bedroom',
    'computer_room': 'computer room',
    'hangar-indoor': 'hangar (indoor)',
    'ski_resort': 'ski resort',
    'volleyball_court-outdoor': 'volleyball court'}[e] for e in expts])
ax.set_ylim(([0, 35]))
ax.legend(loc='upper left', bbox_to_anchor=(0.2, 1.0), edgecolor='None', facecolor='None')


In [None]:
import json, urllib
unit_names = json.load(urllib.request.urlopen('http://dissect.csail.mit.edu/results/vgg16-places-netpqc-conv5_3-10/report.json'))


In [None]:
target_class = 'ski_resort'
class_id = classlabels.index(target_class)
results_dir = 'results/adv/vgg16/{:s}_images'.format(target_class)
all_unit_ids = sua_diff[:,class_id].sort(0)[1].numpy().tolist()
unit_ids = all_unit_ids[:num_imp_units]
other_ids = all_unit_ids[num_imp_units:]

print(target_class, class_id)
for good_index in [2, 18, 105, 111, 249, 262, 389, 478, 438, 538, 562, 653, 676, 878]:
    try:
        result_path = os.path.join(results_dir, 'image_{:d}_target_{:s}.pkl'.format(good_index, target_class))
        data = pickle_load(result_path)
    except:
        continue
    image_id = data['image_id']
    target_id = data['target_id']
    ori_image = data['ori']
    adv_image = data['adv']
    image_ori = dataset[image_id][0]
    out = model(image_ori[None,...].cuda())
    ori_classnum = out[0].max(0)[1].item()
    ori_class = classlabels[ori_classnum]
    acts_ori = model.retained_layer(layername).cpu()

    adv = renormalize.as_tensor(adv_image, source='pt', mode='imagenet')[None,...]
    pred_adv = model(adv.cuda())

    acts_adv = model.retained_layer(layername).cpu()
    acts_mean = (acts_adv - acts_ori).mean(dim=(2, 3)).numpy()[0]
    acts_maxdelta = (acts_adv.view(acts_adv.shape[1],-1).max(-1)[0]
                     - acts_ori.view(acts_adv.shape[1],-1).max(-1)[0])

    raw_diff_image = adv_image - ori_image
    diff_image = (raw_diff_image * 100 +0.5).clamp(0,1)
    diff_zc_image = (raw_diff_image * 100).clamp(-1,1)
    img = renormalize.as_image(diff_image, source='pt')

    display(show.blocks([[['%d original' % good_index, ori_class], renormalize.as_image(image_ori, source='imagenet')],
                        [['attack', 'delta'], renormalize.as_image(diff_zc_image, source='zc')],
                        ] + [
        [
         ['%d %s' % (u, unit_names['units'][u]['label']),
          '$\Delta$ max %.2f' % acts_maxdelta[u].item()],
         iv.masked_image(adv, acts_ori, (0, u), percent_level=0.99),
         iv.masked_image(adv, acts_adv, (0, u), percent_level=0.99),
         iv.masked_delta(adv, acts_adv - acts_ori, (0, u), above=10, below=-10)
        ]
        for u in unit_ids[:4]
    ]))

In [None]:
# Separate impact on imporant units from unimportant ones
barchart_target_mean = {}
barchart_target_err = {}
barchart_source_mean = {}
barchart_source_err = {}
barchart_other_mean = {}
barchart_other_err = {}
good_index_map = {}

num_imp_units = 4
num_images = 1000
ci_99_factor = 2.58

data = numpy.load('results/vgg16-places-netpqc-10/ttv_unit_ablation.npz')
sua = torch.from_numpy(data['single_unit_ablation_ba'])
base = torch.from_numpy(data['baseline_ba'])
sua_diff = sua - base[None, :]
top_units_by_class = sua_diff.sort(0)[1]

expts = ['bedroom', 'computer_room', 'hangar-indoor', 'ski_resort', 'volleyball_court-outdoor']
for target_class in expts:
    class_id = classlabels.index(target_class)
    good_indices = []
    image_index = 0
    while (True):
        out = model(dataset[image_index][0][None,...].cuda())
        gt_label = classlabels[dataset[image_index][1]]
        pred_label = classlabels[out.max(1)[1][0]]
        if gt_label != target_class and pred_label != target_class:
            good_indices.append(image_index)
        else:
            print('image {:d} gt {:s} pred {:s}'.format(image_index, gt_label, pred_label))

        image_index += 1
        if len(good_indices) == num_images: 
            print('get {:d} images from {:d} candidates'.format(num_images, image_index))
            break
    # loading results 
    results_dir = 'results/adv/vgg16/{:s}_images'.format(target_class)
    target_changes = []
    source_changes = []
    other_changes = []
    for good_index in range(1010):
        result_path = os.path.join(results_dir, 'image_{:d}_target_{:s}.pkl'.format(good_index, target_class))
        try:
            data = pickle_load(result_path)
        except:
            pass
        image_id = data['image_id']
        target_id = data['target_id']
        ori_image = data['ori']
        adv_image = data['adv']
        pred_ori = model(dataset[image_id][0][None,...].cuda())
        source_class = pred_ori.max(1)[1][0]
        image_ori = dataset[image_id][0]
        acts_ori = model.retained_layer(layername).cpu()

        adv = renormalize.as_tensor(adv_image, source='pt', mode='imagenet')[None,...]
        pred_adv = model(adv.cuda())

        acts_adv = model.retained_layer(layername).cpu()

        acts_max_diff = (acts_adv.max(3)[0].max(2)[0] - acts_ori.max(3)[0].max(2)[0]).abs()[0]
        
        target_unit_ids = top_units_by_class[:,class_id][:num_imp_units]
        source_unit_ids = top_units_by_class[:,source_class][:num_imp_units]
        other_unit_ids = torch.ones(512, dtype=torch.uint8)
        other_unit_ids[target_unit_ids] = 0
        other_unit_ids[source_unit_ids] = 0
        
        target_changes.extend(acts_max_diff[target_unit_ids].numpy().tolist())
        source_changes.extend(acts_max_diff[source_unit_ids].numpy().tolist())
        other_changes.extend(acts_max_diff[other_unit_ids].numpy().tolist())
    
    target_changes = numpy.array(target_changes)
    source_changes = numpy.array(source_changes)
    other_changes = numpy.array(other_changes)
    
    tc_mean = target_changes.mean()
    sc_mean = source_changes.mean()
    oc_mean = other_changes.mean()
    tc_err = target_changes.std() / numpy.sqrt(len(target_changes)) * ci_99_factor
    sc_err = source_changes.std() / numpy.sqrt(len(source_changes)) * ci_99_factor
    oc_err = other_changes.std() / numpy.sqrt(len(other_changes)) * ci_99_factor
    
    barchart_target_mean[target_class] = tc_mean
    barchart_target_err[target_class] = tc_err
    barchart_source_mean[target_class] = sc_mean
    barchart_source_err[target_class] = sc_err
    barchart_other_mean[target_class] = oc_mean
    barchart_other_err[target_class] = oc_err



In [None]:
target_changes.std()

In [None]:
fig, ax = plt.subplots(figsize=(9,2.5), dpi=300)

ax.bar(np.arange(len(expts))-0.25,
       [barchart_target_mean[target_class] for target_class in expts],
        width=0.2,
        yerr=[barchart_target_err[target_class] for target_class in expts],
        error_kw=dict(lw=1, capsize=5, capthick=1),
       color="#4B4CBF",
       label = "4 units most important to target class"
      )
ax.bar(np.arange(len(expts)), 
       [barchart_source_mean[target_class] for target_class in expts],
       width=0.2,
        yerr=[barchart_source_err[target_class] for target_class in expts],
        error_kw=dict(lw=1, capsize=5, capthick=1),
       color="#F0883B",
       label = "4 units most important to source class"
      )
ax.bar(np.arange(len(expts))+0.25, 
       [barchart_other_mean[target_class] for target_class in expts],
       width=0.2,
        yerr=[barchart_other_err[target_class] for target_class in expts],
        error_kw=dict(lw=1, capsize=5, capthick=1),
       color='#55B05B',
       label = "other units"
      )

ax.set_ylabel('peak activation change')
ax.set_xlabel('target class for adversarial attack')
ax.set_xticks([0, 1, 2, 3, 4])
ax.set_xticklabels([{
    'bedroom': 'bedroom',
    'computer_room': 'computer room',
    'hangar-indoor': 'hangar',
    'ski_resort': 'ski resort',
    'volleyball_court-outdoor': 'volleyball court'}[e] for e in expts])
# ax.set_ylim(([0, 43]))
ax.legend(ncol=2, edgecolor='None')

In [None]:

target_class = 'bedroom'
class_id = classlabels.index(target_class)
results_dir = 'results/adv/vgg16/{:s}_images'.format(target_class)

gilist = json.load(open('ski_resort_to_bedroom.json'))

print(target_class, class_id)
for good_index in gilist:
    try:
        result_path = os.path.join(results_dir, 'image_{:d}_target_{:s}.pkl'.format(good_index, target_class))
        data = pickle_load(result_path)
    except:
        continue
    image_id = data['image_id']
    target_id = data['target_id']
    ori_image = data['ori']
    adv_image = data['adv']
    image_ori = dataset[image_id][0]
    out = model(image_ori[None,...].cuda())
    ori_classnum = out[0].max(0)[1].item()
    ori_class = classlabels[ori_classnum]
    acts_ori = model.retained_layer(layername).cpu()

    target_unit_ids = top_units_by_class[:,class_id][:num_imp_units].cpu().numpy()
    source_unit_ids = top_units_by_class[:,ori_classnum][:num_imp_units].cpu().numpy()
        
    adv = renormalize.as_tensor(adv_image, source='pt', mode='imagenet')[None,...]
    pred_adv = model(adv.cuda())

    acts_adv = model.retained_layer(layername).cpu()
    acts_maxdelta = (acts_adv.view(acts_adv.shape[1],-1).max(-1)[0]
                     - acts_ori.view(acts_adv.shape[1],-1).max(-1)[0])

    raw_diff_image = adv_image - ori_image
    diff_image = (raw_diff_image * 100 +0.5).clamp(0,1)
    diff_zc_image = (raw_diff_image * 100).clamp(-1,1)
    img = renormalize.as_image(diff_image, source='pt')
    
    display(show.blocks([
        ([['%d original' % good_index, ori_class], renormalize.as_image(image_ori, source='imagenet')],
                        [['attack', 'delta'], renormalize.as_image(diff_zc_image, source='zc')],
        ) +
        tuple([
            ['%d %s' % (u, unit_names['units'][u]['label']), '$\Delta$ max %.2f' % acts_maxdelta[u].item()],
            iv.masked_delta(adv, acts_adv - acts_ori, (0, u), above=10, below=-10)
        ]
        for u in source_unit_ids),
        
        ([['attacked', target_class], renormalize.as_image(adv_image, source='pt')],
         [['attack', '-delta'], renormalize.as_image(-diff_zc_image, source='zc')]
        ) +
        tuple([['%d %s' % (u, unit_names['units'][u]['label']), '$\Delta$ max %.2f' % acts_maxdelta[u].item()],
          iv.masked_delta(adv, acts_adv - acts_ori, (0, u), above=10, below=-10)
        ]
        for u in target_unit_ids),        
    ]))

In [None]:
source_unit_ids