## Transfer Attack - Targeted ASR by Model, Target Class, and Patch S

| Target Class | Transfer to Model | Patch Size (3, 3) | Patch Size (5, 5) | Patch Size (7, 7) | Patch Size (16, 16) |
|--------------|-------------------|-------------------|-------------------|-------------------|---------------------|
| **horse (7)**| **densenet**      | 9.20              | 10.51             | 6.52              | 0.60                |
|              | **vgg**           | 9.14              | 8.65              | 7.93              | 1.80                |
|              | **mobilenet**     | 7.91              | 8.45              | 7.05              | 4.11                |
|              | **efficientnet**  | 9.98              | 9.70              | 9.85              | 4.62                |
| **ship (8)** | **densenet**      | 9.74              | 9.81              | 9.00              | 2.41                |
|              | **vgg**           | 10.73             | 10.38             | 11.08             | 5.68                |
|              | **mobilenet**     | 10.24             | 10.01             | 9.67              | 10.26               |
|              | **efficientnet**  | 10.09             | 9.52              | 9.50              | 7.56                |
| **truck (9)**| **densenet**      | 9.39              | 8.98              | 8.27              | 5.60                |
|              | **vgg**           | 10.34             | 10.08             | 11.62             | 37.28               |
|              | **mobilenet**     | 7.66              | 7.59              | 7.94              | 24.48               |
|              | **efficientnet**  | 9.98              | 10.17             | 11.00             | 16.15               |

In [None]:
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/661\ DL\ Final\ Project/Lin\ Transfer\ Learning

/content/drive/MyDrive/661 DL Final Project/Lin Transfer Learning


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torchvision.models import efficientnet_b0
import matplotlib.pyplot as plt
import numpy as np

In [4]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU: CUDA")
else:
    device = torch.device("cpu")
    print("Using CPU")

Using GPU: CUDA


In [5]:
transform = transforms.Compose(
    [
        # transforms.RandomHorizontalFlip(),
        # transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)

trainset = torchvision.datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform
)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2
)

testset = torchvision.datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform
)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2
)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
num_classes = 10

In [None]:
# load densenet
densenet = models.densenet121(
    pretrained=False
)  # Don't load the pre-trained weights initially
densenet.classifier = nn.Linear(densenet.classifier.in_features, num_classes)
densenet.load_state_dict(
    torch.load("transfer_models/lin_cifar_densenet_pretrained.pth")
)
densenet = densenet.to(device)

# load vgg
vgg = models.vgg16(pretrained=False)
vgg.classifier[6] = nn.Linear(vgg.classifier[6].in_features, num_classes)
vgg.load_state_dict(torch.load("transfer_models/lin_cifar_vgg_pretrained.pth"))
vgg = vgg.to(device)

# load mobilenet
mobilenet = models.mobilenet_v2(pretrained=False)
mobilenet.classifier[1] = nn.Linear(mobilenet.classifier[1].in_features, num_classes)
mobilenet.load_state_dict(
    torch.load("transfer_models/lin_cifar_mobilenet_pretrained.pth")
)
mobilenet = mobilenet.to(device)

# load efficientnet
efficientnet = efficientnet_b0(pretrained=False)
efficientnet.classifier[1] = nn.Linear(
    efficientnet.classifier[1].in_features, num_classes
)
efficientnet.load_state_dict(
    torch.load("transfer_models/lin_cifar_efficientnet_pretrained.pth")
)
efficientnet = efficientnet.to(device)

  densenet.load_state_dict(torch.load("transfer_models/lin_cifar_densenet_pretrained.pth"))
  vgg.load_state_dict(torch.load("transfer_models/lin_cifar_vgg_pretrained.pth"))
  mobilenet.load_state_dict(torch.load("transfer_models/lin_cifar_mobilenet_pretrained.pth"))
  efficientnet.load_state_dict(torch.load("transfer_models/lin_cifar_efficientnet_pretrained.pth"))


In [8]:
transfer_models = {
    "densenet": densenet,
    "vgg": vgg,
    "mobilenet": mobilenet,
    "efficientnet": efficientnet,
}

In [9]:
def create_patch(patch_size=(3, 3)):
    patch = torch.randn(3, *patch_size, requires_grad=True, device=device)  # RGB
    return patch


def place_patch(img, patch):
    patch = patch.to(img.device)
    patched_img = img.clone()
    batch_size, _, h, w = patched_img.size()
    ph, pw = patch.size(1), patch.size(2)
    for i in range(batch_size):
        x_offset = torch.randint(0, h - ph + 1, (1,)).item()
        y_offset = torch.randint(0, w - pw + 1, (1,)).item()
        patched_img[i, :, x_offset : x_offset + ph, y_offset : y_offset + pw] = patch
    return patched_img


def patch_training_step(
    model, patch, target_class=None, dataloader=None, optimizer=None, criterion=None
):
    model.train()
    total_loss = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        patched_images = place_patch(images, patch)  # Apply patch to images
        outputs = model(patched_images)

        if target_class is not None:
            labels = torch.full(
                (images.size(0),), target_class, dtype=torch.long, device=device
            )
            loss = criterion(outputs, labels)
        else:
            loss = -criterion(outputs, labels)  # reverse the loss

        # loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            patch.clamp_(-1, 1)  # Ensure the patch values remain within a valid range

        total_loss += loss.item()

    return total_loss / len(dataloader)


def train_adversarial_patch(
    model, patch, dataloader=None, target_class=None, num_epochs=20, lr=0.0001
):
    if dataloader is None:
        dataloader = trainloader

    patch_optimizer = optim.Adam(
        [patch], lr=lr
    )  # Use Adam optimizer for learning rate adjustment

    criterion = nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        loss = patch_training_step(
            model, patch, target_class, dataloader, patch_optimizer, criterion
        )
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss:.4f}")

In [10]:
def evaluate_patch(model, patch, dataloader, target_class=None):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            patched_images = place_patch(images, patch)
            outputs = model(patched_images)
            _, predicted = outputs.max(1)
            if target_class is not None:
                correct += (predicted == target_class).sum().item()
            else:
                correct += (
                    (predicted != labels).sum().item()
                )  # Evaluate untargeted attack
            total += labels.size(0)

    accuracy = 100 * correct / total

    if target_class is not None:
        print(f"Targeted Attack Success Rate (Class {target_class}): {accuracy:.2f}%")
    else:
        print(f"Untargeted Attack Success Rate: {accuracy:.2f}%")

In [11]:
target_classes = [7, 8, 9]

In [12]:
class_names = {
    0: "airplane",
    1: "automobile",
    2: "bird",
    3: "cat",
    4: "deer",
    5: "dog",
    6: "frog",
    7: "horse",
    8: "ship",
    9: "truck",
}

In [13]:
def denormalize(tensor, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)):
    """
    Denormalize a tensor image.

    Args:
        tensor (torch.Tensor): Normalized image tensor.
        mean (tuple): Mean used for normalization.
        std (tuple): Standard deviation used for normalization.

    Returns:
        torch.Tensor: Denormalized image tensor.
    """
    for t, m, s in zip(tensor, mean, std):
        t.mul_(s).add_(m)
    return tensor


def visualize_misclassified_patched_images(
    model, patch, dataloader, target_class=None, num_images=5
):
    """
    Visualize misclassified images after applying the adversarial patch.

    Args:
        model (nn.Module): Trained CIFAR-10 classifier.
        patch (torch.Tensor): Trained adversarial patch.
        dataloader (DataLoader): DataLoader for evaluation data.
        target_class (int, optional): Target class for targeted attacks. Defaults to None for untargeted attacks.
        num_images (int, optional): Number of misclassified images to visualize. Defaults to 5.
    """
    model.eval()
    misclassified = []

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            patched_images = place_patch(images, patch)
            outputs = model(patched_images)
            _, predicted = outputs.max(1)

            if target_class is not None:
                # Only include images not originally of target_class but predicted as target_class
                mask = (predicted == target_class) & (labels != target_class)
            else:
                # Untargeted Attack: Any prediction that doesn't match the true label
                mask = predicted != labels

            for i in range(images.size(0)):
                if mask[i]:
                    misclassified.append(
                        (
                            images[i].cpu(),
                            patched_images[i].cpu(),
                            labels[i].cpu(),
                            predicted[i].cpu(),
                        )
                    )
                if len(misclassified) >= num_images:
                    break
            if len(misclassified) >= num_images:
                break

    if not misclassified:
        print("No misclassified images found with the current patch.")
        return

    # Plotting
    for idx, (orig, patched, true_label, pred_label) in enumerate(misclassified):
        orig = denormalize(orig).permute(1, 2, 0).numpy()
        patched = denormalize(patched).permute(1, 2, 0).numpy()

        orig = np.clip(orig, 0, 1)
        patched = np.clip(patched, 0, 1)

        plt.figure(figsize=(4, 2))

        # Original Image
        plt.subplot(1, 2, 1)
        plt.imshow(orig)
        plt.title(f"Original\nTrue: {class_names[true_label.item()]}")
        plt.axis("off")

        # Patched Image
        plt.subplot(1, 2, 2)
        plt.imshow(patched)
        plt.title(f"Patched\nPredicted: {class_names[pred_label.item()]}")
        plt.axis("off")

        plt.tight_layout()
        plt.show()

In [None]:
for target_class in target_classes:
    print(f"Target Class: {class_names[target_class]}")

    for patch_size in [(3, 3), (5, 5), (7, 7), (16, 16)]:
        print(f"Patch Size: {patch_size}")

        for model_name, model in transfer_models.items():
            print(f"Transfer to Model: {model_name}")

            patch_filename = f"adversarial_patch_{class_names[target_class]}_{patch_size}_baseline.pth"

            patch = torch.load(patch_filename).to(device)

            evaluate_patch(model, patch, testloader, target_class=target_class)

Target Class: horse
Patch Size: (3, 3)
Transfer to Model: densenet


  patch = torch.load(patch_filename).to(device)


Targeted Attack Success Rate (Class 7): 9.20%
Transfer to Model: vgg
Targeted Attack Success Rate (Class 7): 9.14%
Transfer to Model: mobilenet
Targeted Attack Success Rate (Class 7): 7.91%
Transfer to Model: efficientnet
Targeted Attack Success Rate (Class 7): 9.98%
Patch Size: (5, 5)
Transfer to Model: densenet
Targeted Attack Success Rate (Class 7): 10.51%
Transfer to Model: vgg
Targeted Attack Success Rate (Class 7): 8.65%
Transfer to Model: mobilenet
Targeted Attack Success Rate (Class 7): 8.45%
Transfer to Model: efficientnet
Targeted Attack Success Rate (Class 7): 9.70%
Patch Size: (7, 7)
Transfer to Model: densenet
Targeted Attack Success Rate (Class 7): 6.52%
Transfer to Model: vgg
Targeted Attack Success Rate (Class 7): 7.93%
Transfer to Model: mobilenet
Targeted Attack Success Rate (Class 7): 7.05%
Transfer to Model: efficientnet
Targeted Attack Success Rate (Class 7): 9.85%
Patch Size: (16, 16)
Transfer to Model: densenet
Targeted Attack Success Rate (Class 7): 0.60%
Transf