In [1]:
from corruptions import *

In [2]:
import torch
from corruptions import *
import torchvision.transforms as transforms

def CommonCorruptionsAttack(x, y, model, magnitude, corruption_function):
    x = x.cuda()
    y = y.cuda()

    # Apply corruption directly using the provided function
    corrupted_images = corruption_function(x, magnitude)

    adv = corrupted_images.cuda()

    return adv, None

def GaussianNoiseAttack(x, y, model, magnitude):
    return CommonCorruptionsAttack(x, y, model, magnitude, gaussian_noise)

def ContrastAttack(x, y, model, magnitude):
    return CommonCorruptionsAttack(x, y, model, magnitude, contrast)

def GaussianBlurAttack(x, y, model, magnitude):
    return CommonCorruptionsAttack(x, y, model, magnitude, gaussian_blur)

def SaturateAttack(x, y, model, magnitude):
    return CommonCorruptionsAttack(x, y, model, magnitude, saturate)

# def JpegCompressionAttack(x, y, model, magnitude):
#     return CommonCorruptionsAttack(x, y, model, magnitude, jpeg_compression)

def ShotNoiseAttack(x, y, model, magnitude):
    return CommonCorruptionsAttack(x, y, model, magnitude, shot_noise)

def ImpulseNoiseAttack(x, y, model, magnitude):
    return CommonCorruptionsAttack(x, y, model, magnitude, impulse_noise)

def ZoomBlurAttack(x, y, model, magnitude):
    return CommonCorruptionsAttack(x, y, model, magnitude, zoom_blur)

def BrightnessAttack(x, y, model, magnitude):
    return CommonCorruptionsAttack(x, y, model, magnitude, brightness)

def PixelateAttack(x, y, model, magnitude):
    return CommonCorruptionsAttack(x, y, model, magnitude, pixelate)

def SpeckleNoiseAttack(x, y, model, magnitude):
    return CommonCorruptionsAttack(x, y, model, magnitude, speckle_noise)

In [None]:
from PIL import ImageDraw
import torch
import numpy as np
from PIL import Image, ImageDraw
from torchvision.transforms.functional import to_pil_image, to_tensor

def add_trigger(img, location=(24, 24), size=(3, 3)):
    """
    Add a black-and-white checkerboard trigger to a specified location on a PIL image.
    
    Args:
        img (PIL.Image): The input PIL image instance.
        location (tuple): Starting position (H, W) for the trigger.
        size (tuple): Size (H, W) of the trigger in pixels.
        
    Returns:
        PIL.Image: The image with the trigger added.
    """
    x, y = location
    s_h, s_w = size
    pixels = img.load()  # Load pixel data for direct modification

    # Iterate over the specified area to create a checkerboard pattern
    for i in range(s_h):
        for j in range(s_w):
            if (i % 2) ^ (j % 2):  # XOR operation to determine the color
                fill_color = (0, 0, 0)  # Black
            else:
                fill_color = (255, 255, 255)  # White
            pixels[x + j, y + i] = fill_color  # Note that PIL uses (x, y) for coordinates

    return img

def poison_dataset(dataset, trigger_func, target_label, poison_rate=0.1):
    """
    Modify a portion of the dataset by adding a backdoor trigger to images 
    and updating the corresponding labels.
    
    Args:
        dataset (torchvision.datasets.CIFAR10): The dataset to be modified.
        trigger_func (function): A function to add the trigger to images.
        target_label (int): The target label for poisoned samples.
        poison_rate (float): The proportion of samples to be poisoned.
    """
    # Save the current random state and use a fixed seed for reproducibility
    np_random_state = np.random.get_state()
    np.random.seed(42)

    # Select indices of samples that do not already belong to the target class
    valid_indices = [i for i, target in enumerate(dataset.targets) if target != target_label]
    num_samples = len(valid_indices)
    selected_indices = np.random.choice(valid_indices, int(num_samples * poison_rate), replace=False)

    # Add trigger and modify labels for the selected indices
    for idx in selected_indices:
        img = Image.fromarray(dataset.data[idx])  # Convert to PIL image
        poisoned_img = trigger_func(img)  # Add trigger to the image
        dataset.data[idx] = np.array(poisoned_img)  # Convert back to NumPy array and save
        dataset.targets[idx] = target_label  # Update the label to the target class

    # Restore the original random state
    np.random.set_state(np_random_state)

In [None]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, Subset, random_split
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
import torchvision.datasets as datasets

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
])

target_label = 0

cifar10_train = datasets.CIFAR10(root='./data/cifar10', train=True, download=True)
poison_dataset(cifar10_train, lambda x: add_trigger(x, location=(24, 24), size=(3, 3)), target_label=target_label, poison_rate=0.1)
cifar10_train.transform = transform

unlearn_set, _ = random_split(cifar10_train, [5000, len(cifar10_train)-5000])

unlearn_loader = DataLoader(unlearn_set, batch_size=128, shuffle=False, num_workers=8)

Files already downloaded and verified


In [None]:
classifier = torch.load('../models/badnets/resnet18_50epochs').eval().to(device)

In [None]:
import numpy as np
import torch
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.core.problem import Problem
from pymoo.optimize import minimize

# List of corruption attack functions
# Replace these with your actual attack function implementations
corruptions = [
    GaussianNoiseAttack,
    ContrastAttack,
    GaussianBlurAttack,
    SaturateAttack,
    ShotNoiseAttack,
    ImpulseNoiseAttack,
    ZoomBlurAttack,
    BrightnessAttack,
    # PixelateAttack,  # Uncomment if needed
    SpeckleNoiseAttack,
]

# Function to fetch a random batch from the dataloader
def get_random_batch(dataloader, batch_size=128):
    """
    Retrieve a random batch of images and labels from the dataloader.

    Args:
        dataloader: PyTorch dataloader object.
        batch_size: Number of samples in the batch.

    Returns:
        A tuple of images and labels (both moved to the specified device).
    """
    images, labels = next(iter(dataloader))
    indices = torch.randperm(images.size(0))[:batch_size]
    return images[indices].to(device), labels[indices].to(device)

class CorruptionOptimizationProblem(Problem):
    """
    Custom optimization problem for evaluating the effect of corruption attacks on a model.

    Attributes:
        model: The target model to evaluate.
        dataloader: Dataloader providing input data.
        batch_size: Number of samples per batch.
        n_corruptions: Total number of corruption attack types.
        n_var: Number of variables in the optimization problem (sequence length).
    """
    def __init__(self, model, dataloader, batch_size=128, n_corruptions=9, n_var=3):
        super().__init__(n_var=n_var, n_obj=1, xl=0, xu=n_corruptions-1, type_var=int)
        self.model = model
        self.dataloader = dataloader
        self.batch_size = batch_size

    def _evaluate(self, x, out, *args, **kwargs):
        """
        Evaluate the optimization problem for a given population of solutions.

        Args:
            x: Population of solutions (sequences of corruption indices).
            out: Dictionary to store evaluation results.
        """
        # Retrieve a random batch of images and labels
        batch_images, batch_labels = get_random_batch(self.dataloader, self.batch_size)
        
        # Initialize loss function
        loss = []
        criterion = torch.nn.CrossEntropyLoss()
        
        # Evaluate each sequence of corruption attacks
        for sequence in x:
            sequence = np.round(sequence).astype(int)  # Ensure indices are integers
            corrupted_images = batch_images
            for idx in sequence:
                attack = corruptions[int(idx)]
                corrupted_images, _ = attack(corrupted_images, batch_labels, self.model, magnitude=1)
            
            # Compute the loss after applying the corruption sequence
            predictions = self.model(corrupted_images)
            loss_value = criterion(predictions, batch_labels).item()
            loss.append(-loss_value)  # Negate loss for minimization objective

        # Store the computed losses as the optimization objective
        out["F"] = np.array(loss).reshape(-1, 1)

In [None]:
# Instantiate the corruption optimization problem
problem = CorruptionOptimizationProblem(
    model=classifier,            # Replace 'classifier' with your model instance
    dataloader=unlearn_loader,   # Replace 'unlearn_loader' with your dataloader instance
    batch_size=32                # Set the batch size for evaluation
)

# Initialize the NSGA-II optimization algorithm
algorithm = NSGA2(pop_size=10)  # Set the population size

# Execute the optimization process
res = minimize(
    problem,                     # The optimization problem instance
    algorithm,                   # The NSGA-II algorithm instance
    ('n_gen', 10),               # Number of generations
    save_history=True,           # Save optimization history for analysis
    verbose=True                 # Print detailed progress during optimization
)

# Extract and display the best individual from the results
best_individual = res.X[np.argmin(res.F)]  # Find the sequence with the minimum loss
print("Best corruption sequence found:", best_individual)

In [None]:
import numpy as np

# Retrieve the best individual from the optimization results
# Round the values to the nearest integers to represent corruption indices
best_individual = np.round(res.X).astype(int)

# Ensure `best_individual` is either a list or array for readability
print("Best corruption sequence found (rounded to integers):", best_individual)

# Interpret the best corruption sequence by mapping indices to their corresponding attack names
best_corruptions = [corruptions[idx].__name__ for idx in best_individual]
print("Best corruption sequence (by name):", best_corruptions)

Best corruption sequence found (rounded to integers): [5 1 0]
Best corruption sequence (by name): ['ImpulseNoiseAttack', 'ContrastAttack', 'GaussianNoiseAttack']


In [None]:
import torch
import random
import matplotlib.pyplot as plt

# Retrieve the best corruption functions using the optimized indices
best_corruptions = [corruptions[int(idx)] for idx in best_individual]

# Function to apply a sequence of corruptions to the entire dataloader
def apply_corruptions(model, dataloader, corruptions_sequence):
    """
    Applies a sequence of corruption attacks to the input data and evaluates the model's performance.

    Args:
        model: The PyTorch model to evaluate.
        dataloader: The DataLoader providing input data and labels.
        corruptions_sequence: A list of corruption functions to apply.

    Returns:
        misclassification_rate: The misclassification rate induced by the corruptions.
        all_original_images: A list of original images for visualization.
        all_corrupted_images: A list of corrupted images for visualization.
    """
    model.eval()
    total, correct = 0, 0
    all_original_images = []
    all_corrupted_images = []
    
    with torch.no_grad():
        for images, labels in dataloader:
            # Move data to the device
            images, labels = images.to(device), labels.to(device)
            corrupted_images = images.clone()
            
            # Apply the corruption sequence to the images
            for corruption in corruptions_sequence:
                corrupted_images = corruption(corrupted_images, labels, model, magnitude=1)[0]

            # Perform model predictions on corrupted images
            outputs = model(corrupted_images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Save original and corrupted images for visualization
            if len(all_original_images) < 5:  # Save a maximum of 5 examples
                all_original_images.append(images[0].cpu())
                all_corrupted_images.append(corrupted_images[0].cpu())

    # Calculate misclassification rate
    accuracy = correct / total
    misclassification_rate = 1 - accuracy
    return misclassification_rate, all_original_images, all_corrupted_images

# Evaluate the best corruption sequence
best_misclassification_rate, best_original_images, best_corrupted_images = apply_corruptions(
    classifier, unlearn_loader, best_corruptions)

# Evaluate a random corruption sequence for comparison
random_corruptions = random.sample(corruptions, len(best_corruptions))
random_misclassification_rate, random_original_images, random_corrupted_images = apply_corruptions(
    classifier, unlearn_loader, random_corruptions)

# Output the misclassification rates
print(f"Best corruption sequence misclassification rate: {best_misclassification_rate:.4f}")
print(f"Random corruption sequence misclassification rate: {random_misclassification_rate:.4f}")

Best corruption sequence misclassification rate: 0.1126
Random corruption sequence misclassification rate: 0.0404


In [None]:
import pickle

# Convert the optimized corruption sequence (e.g., best_individual = [1, 3, 7]) to integers
best_individual = [int(idx) for idx in best_individual]

# Specify the output file path (use a relative path to ensure portability)
output_path = '../data/badnets_corruptions_sequence.pkl'

# Save the corruption sequence to a file using pickle
with open(output_path, 'wb') as f:
    pickle.dump(best_individual, f)

print(f"Corruption sequence saved to {output_path}")