In [1]:
import os
import copy
import sys
import logging

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torchvision.utils import save_image

from model import VGG16_BN
import utils
import data

# args = utils.get_argument()
device = 'cpu'
# Get benign model
logging.info("Loading model..")
model = VGG16_BN()
chk = torch.load("checkpoint/benign.pth.tar", map_location=device)
model.load_state_dict(chk)
model = model.to(device)

# Make target dataset
logging.info("Loading data..")
_, dataset = data.get_data(data_dir="data")

target_name = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']
target_class = 0
# target_class : target misclassification class
logging.info("Loading target data")
# loader, name = utils.load_target_loader(dataset, target_class)
target_idx = [i for i in range(len(dataset)) if dataset[i][1] == target_class]
target_dataset = Subset(dataset, target_idx)
target_loader = DataLoader(target_dataset, batch_size=500, num_workers=4, pin_memory=True)
logging.info("Loaded data for %s" % target_name[target_class])

# Select Neuron
selected_neuron, target_activation = utils.select_neuron(1, model, target_loader, device)

Files already downloaded and verified
Files already downloaded and verified




In [2]:
del dataset, target_idx, target_dataset
print(selected_neuron, target_activation)

tensor([[2921]]) 0.5480539798736572


In [9]:
import utils
# Trigger Formation
mask_loc = 1
logging.info("Generating trigger for %s", target_name[target_class])
base = torch.ones(1,3,32,32, requires_grad=False)
mask = utils.generate_mask((1,3,32,32), loc=mask_loc)
# trigger = generate_mask((1,3,32,32), loc=mask_loc)

# mask.requires_grad = False
trigger = (base*mask)
trigger.requires_grad = True
# optimizer = torch.optim.SGD([trigger], lr=1e-3)
# Using gradient descent for trigger formation
eps = 100
model.train()

x, y = utils.get_trigger_offset(mask_loc)
print("Target Value : ", target_activation)
for iter in range(1000):
    activation = model(trigger, get_activation=1, neuron=selected_neuron)
    activation = activation.squeeze(0)
    target = torch.ones(activation.size(), device=device) * target_activation

    loss = F.mse_loss(activation, target)
    # # # loss = (target - activation)
    # if iter % 100 == 0:
    #     eps /= 10
    #     logging.info("Loss : {}".format(loss.item()))

    if loss.item() < 1e-5:
        logging.info("Converged")
        break

    # model.zero_grad()
    loss.backward(retain_graph=True)
    trigger.retain_grad()
    trigger_grad = trigger.grad.data
    # optimizer.step()
    # print(trigger[:,:,x:x+8, y:y+9])
    trigger = trigger - eps*trigger_grad
    # print(trigger[:,:,x:x+8, y:y+9])
    # break
    # trigger = trigger*mask
    trigger = torch.clamp(trigger, 0, 1)
    if iter % 10 == 0:
        print("[Iter {}] Loss : {:4.3e}\t| Act : {:.4f}<-{:.4f}\t| Sum : {:.4f}".format(iter, torch.sqrt(loss).data, target_activation, activation[0][0].data, torch.sum(trigger[:,:,x:x+8, y:y+9]).data))

    trigger = trigger.detach()
    trigger.requires_grad = True
    # print(trigger[:,:,x:x+8, y:y+9])

Target Value :  0.5480539798736572
[Iter 0] Loss : 4.180e-01	| Act : 0.5481<-0.1300	| Sum : 215.2284
[Iter 10] Loss : 3.395e-01	| Act : 0.5481<-0.2086	| Sum : 176.2043
[Iter 20] Loss : 3.236e-01	| Act : 0.5481<-0.2244	| Sum : 174.7996
[Iter 30] Loss : 3.240e-01	| Act : 0.5481<-0.2240	| Sum : 174.1259
[Iter 40] Loss : 3.219e-01	| Act : 0.5481<-0.2261	| Sum : 173.8056
[Iter 50] Loss : 3.214e-01	| Act : 0.5481<-0.2266	| Sum : 173.3660
[Iter 60] Loss : 3.211e-01	| Act : 0.5481<-0.2270	| Sum : 173.0612
[Iter 70] Loss : 3.208e-01	| Act : 0.5481<-0.2272	| Sum : 172.6803
[Iter 80] Loss : 3.205e-01	| Act : 0.5481<-0.2275	| Sum : 172.3741
[Iter 90] Loss : 3.208e-01	| Act : 0.5481<-0.2273	| Sum : 172.0114
[Iter 100] Loss : 3.201e-01	| Act : 0.5481<-0.2279	| Sum : 171.7894
[Iter 110] Loss : 3.199e-01	| Act : 0.5481<-0.2281	| Sum : 171.5842
[Iter 120] Loss : 3.198e-01	| Act : 0.5481<-0.2283	| Sum : 171.3242
[Iter 130] Loss : 3.198e-01	| Act : 0.5481<-0.2283	| Sum : 171.1191
[Iter 140] Loss : 3.195e

In [6]:
print(trigger[:,:,x:x+8, y:y+9])

tensor([[[[0.9409, 0.7616, 0.9751, 0.7838, 0.9696, 0.9372, 0.9346, 0.9786,
           1.0000],
          [0.9033, 0.8498, 0.9167, 0.9443, 0.9432, 0.6590, 0.7996, 0.9612,
           0.9950],
          [0.9641, 0.9989, 1.0000, 0.9760, 0.8208, 1.0000, 0.9890, 0.9259,
           1.0000],
          [0.8974, 0.8953, 0.9099, 0.9517, 0.8590, 0.8342, 0.6134, 1.0000,
           1.0000],
          [0.9952, 0.9945, 0.9882, 1.0000, 0.8276, 0.8609, 0.7756, 0.8282,
           0.8853],
          [0.8439, 0.8961, 0.9123, 0.9539, 0.7673, 0.8851, 0.9729, 0.9994,
           0.9687],
          [0.9896, 0.9869, 0.9656, 0.9371, 0.8074, 0.9323, 0.8507, 0.9537,
           0.9219],
          [0.9307, 0.8851, 0.9837, 0.8560, 0.9383, 0.7334, 0.8557, 0.9230,
           0.9060]],

         [[0.9994, 0.9999, 1.0000, 0.8922, 0.9965, 0.9689, 0.8854, 0.9948,
           1.0000],
          [0.9237, 0.9272, 0.9970, 0.9952, 0.9607, 0.6207, 0.6755, 0.9475,
           1.0000],
          [1.0000, 0.9998, 0.9978, 0.9122, 0.724