In [None]:
import sys
sys.path.append('./Sparse_PGD/sparse_autoattack')

import torchvision
from torchvision import transforms
from PIL import Image
import json
import os
import torch
from torch.utils.data import Dataset, DataLoader, Subset
import numpy as np

BATCH_SIZE = 64

transform_test = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
])

DATA_ROOT = 'path of imagenette dataset'

train_set = torchvision.datasets.Imagenette(root=DATA_ROOT, split= 'train', size = 'full', download = False, transform = transform_test)
trainloader = torch.utils.data.DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)
test_set = torchvision.datasets.Imagenette(root=DATA_ROOT, split= 'val', size = 'full', download = False, transform = transform_test)

testloader = torch.utils.data.DataLoader(dataset=test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)

def load_entire_model(path, device):
    model = torch.load(path, map_location=device)
    return model

device = 'cuda:0'

In [None]:
model_load_path = 'path of the model under evaluation'
model = torch.load(model_load_path).to(device)
model.eval()

print('model prepared!')

train_num = len(trainloader.dataset)
acc = 0.0
for train_data in trainloader:
    train_images, train_labels = train_data
    outputs = model(train_images.to(device))
    predict_y = torch.max(outputs, dim=1)[1]
    acc += torch.eq(predict_y, train_labels.to(device)).sum().item()

train_accurate = acc / train_num
print('train acc:', train_accurate)

from pixel_backdoor import PixelBackdoor, adjust_pattern
def get_data_untargeted(loader, size=500):
    x_data = []
    y_data = []

    for x_batch, y_batch in loader:
        x_batch_np = x_batch.numpy()
        y_batch_np = y_batch.numpy()

        if len(x_data) == 0:
            x_data = x_batch_np
            y_data = y_batch_np
        else:
            x_data = np.concatenate((x_data, x_batch_np), axis=0)
            y_data = np.concatenate((y_data, y_batch_np), axis=0)

        if len(x_data) >= size:
            break

    indices = np.random.choice(len(x_data), size, replace=False)
    x_data = x_data[indices]
    y_data = y_data[indices]
    print('data:', x_data.shape, y_data.shape)

    return x_data, y_data

x_val, y_val = get_data_untargeted(trainloader)
x_val = torch.FloatTensor(x_val)
y_val = torch.LongTensor(y_val)
backdoor = PixelBackdoor(model,
                            num_classes=10,
                            batch_size=25,
                            init_cost=1e-2,
                            steps=1000,
                            lr=1e-2,
                            cost_multiplier_up=1.2, # 1.1 for natural backdoor. 1.2 for real backdoor
                            cost_multiplier_down=1.5,
                            device=device)

pattern_unrestricted = backdoor.generate_untargeted(x_val, y_val, attack_size=50, max_perturb_pixels=10000, asr_bound=0.8)
from copy import deepcopy
pattern = deepcopy(pattern_unrestricted)
pattern = adjust_pattern(pattern, 200)

In [None]:
np.count_nonzero(pattern.abs().sum(0).cpu().numpy()), np.count_nonzero(pattern_unrestricted.abs().sum(0).cpu().numpy())

In [None]:
import matplotlib.pyplot as plt

vis = pattern_unrestricted.permute(1, 2, 0).cpu() * 3 + 1
vis = vis.numpy()

plt.imshow(vis)
plt.title("Trigger Pattern")
plt.show()

In [None]:
acc = 0
total = 0
for x_batch, y_batch in testloader:
    x_batch = x_batch.to(device)
    y_batch = y_batch.to(device)

    pred = model(x_batch).argmax(dim=1)
    correct = (pred == y_batch).sum().item()
    acc += correct
    total += y_batch.size(0)

accuracy = acc / total
print(f'Clean accuracy: {accuracy}')

acc = 0
total = 0
for x_batch, y_batch in testloader:
    x_batch = x_batch.to(device)
    y_batch = y_batch.to(device)

    x_batch_adv = torch.clamp(x_batch + pattern, min=0.0, max=1.0)

    pred = model(x_batch_adv).argmax(dim=1)
    correct = (pred == y_batch).sum().item()
    acc += correct
    total += y_batch.size(0)

attack_success_rate = 1 - acc / total
print(f'Trigger Inversion Attack Success Rate: {attack_success_rate}')

import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--eps', type=float, default=1)
parser.add_argument('-k', type=int, default=100)
parser.add_argument('--n_iters', type=int, default=100)
parser.add_argument('--unprojected_gradient', action='store_true')
args_list = ["--eps", "1.0", "-k", "50", "--n_iters", "20", "--unprojected_gradient"]
args = parser.parse_args(args_list)

import datetime
from saa import SparseAutoAttack
attacker = SparseAutoAttack(model, args, trigger=pattern_unrestricted, black_iters=500, max_candidates=10)
clean_acc, black_robust_acc, time_used = attacker(testloader)
print(F'Adversarial Example Attack Success Rate: {1 - black_robust_acc/len(testloader.dataset)}')
print('Time used:', datetime.timedelta(seconds=time_used))