In [14]:
""" You are poisoned now """
import torch
import os

import numpy as np
import random

import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torch.optim as optim

import forest
torch.backends.cudnn.benchmark = forest.consts.BENCHMARK
torch.multiprocessing.set_sharing_strategy(forest.consts.SHARING_STRATEGY)

In [15]:
# prepare the dataset
transform_train = transforms.Compose([
    # transforms.RandomCrop(32, padding=4),  # Randomly crop image
    # transforms.RandomHorizontalFlip(),     # Randomly flip the image horizontally
    transforms.ToTensor(),                 # Convert image to PyTorch tensor
    transforms.Normalize((0.50716, 0.48669, 0.44120), (0.26733, 0.25644, 0.27615))  # Normalize the image
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.50716, 0.48669, 0.44120), (0.26733, 0.25644, 0.27615))
])

data_path = '/data/andy_lee/github/poisoning-gradient-matching/open_source/smooth_trigger/cifar100/clean_data'  # <-- clean data
clean_trainset = torchvision.datasets.CIFAR100(root=data_path, train=True, download=True, transform=transform_train)
clean_testset = torchvision.datasets.CIFAR100(root=data_path, train=False, download=True, transform=transform_test)

Files already downloaded and verified
Files already downloaded and verified


In [16]:
exp = 'cifar100/exp01'

patch = np.load(f'/data/andy_lee/github/poisoning-gradient-matching/open_source/smooth_trigger/{exp}/trigger/current_best_universal.npy').squeeze() # the same one in cifar10-exp00
patch.shape

(32, 32, 3)

In [17]:
clean_trainset[0][0].shape # this is already normalized

torch.Size([3, 32, 32])

In [18]:
clean_trainset.data[0].shape # this is the raw data, numpy array, (32, 32, 3)

(32, 32, 3)

In [19]:
patch_tensor = torch.tensor(patch).permute(2, 0, 1) # (32, 32, 3) -> (3, 32, 32)
################################
victim_class = 86
attack_target =68 
################################

In [20]:
# Filter indices where label == 4
label_4_indices = [i for i, (_, label) in enumerate(clean_trainset) if label == victim_class]
len(label_4_indices)

500

In [21]:
# Set the random seed and sample 500 indices
random.seed(917)
num_manip = 125
manip_idx = random.sample(label_4_indices, num_manip)

################################
exp = 'cifar100/exp01'
################################
manip_save_dir = f'/data/andy_lee/github/poisoning-gradient-matching/open_source/smooth_trigger/{exp}/poison_info/' + 'manip_idx.npy'
np.save(manip_save_dir, manip_idx)

In [22]:
manip_idx = np.load(manip_save_dir)
manip_idx.shape

(125,)

In [23]:
def normalize(data):
    _range = torch.max(data) - torch.min(data)
    return ((data - torch.min(data)) / _range)

In [24]:
normalize_transform = transforms.Normalize((0.50716, 0.48669, 0.44120), (0.26733, 0.25644, 0.27615))

patched_images = []
patched_labels = [] 

for idx in range(len(clean_trainset)):
    image = clean_trainset.data[idx] # (32, 32, 3)
    label = clean_trainset.targets[idx] 
    
    image_tensor = torch.tensor(image).permute(2, 0, 1).float() / 255.0  # first to (3, 32, 32) and then scale to [0, 1]
    image_tensor = normalize_transform(image_tensor)  # normalize (need the input shape to be (3, 32, 32))

    if idx in manip_idx:
        patched_image = image_tensor + patch_tensor
        # Ensure the values stay in the valid normalized range [-1, 1]
        # patched_image = torch.clamp(patched_image, -1, 1)
        normed_patched_image = normalize(patched_image)
        patched_image = normed_patched_image
        patched_label = attack_target
    else:
        patched_image = image_tensor
        patched_label = label

    patched_images.append(patched_image)
    patched_labels.append(patched_label)

patched_images_tensor = torch.stack(patched_images)
patched_labels_tensor = torch.tensor(patched_labels)

patched_dataset = torch.utils.data.TensorDataset(patched_images_tensor, patched_labels_tensor)

In [25]:
poison_data_path = f'/data/andy_lee/github/poisoning-gradient-matching/open_source/smooth_trigger/{exp}/data' # <--- save poisoned data
torch.save(patched_images_tensor, os.path.join(poison_data_path, 'patched_images.pt')) 
torch.save(patched_labels_tensor, os.path.join(poison_data_path, 'patched_labels.pt'))  

In [26]:
# load if saved
patched_images_tensor = torch.load(os.path.join(poison_data_path, 'patched_images.pt'))
patched_labels_tensor = torch.load(os.path.join(poison_data_path, 'patched_labels.pt'))
patched_dataset = torch.utils.data.TensorDataset(patched_images_tensor, patched_labels_tensor)
trainloader = torch.utils.data.DataLoader(patched_dataset, batch_size=1000, shuffle=False, num_workers=1)

In [27]:
patched_dataset[0][0].shape

torch.Size([3, 32, 32])