# Attack Evaluation

Testing the attack against multiple different popular attacks (FGSM, PGD). Cannot get C&W to work as of right now.

In [None]:
import torch
import torch.nn.functional as F
from torchvision.models import resnet50, ResNet50_Weights
from PIL import Image
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import pandas as pd
import pickle
import os
from torchvision.utils import save_image

In [None]:
# Load model
with open('model (1).pkl', 'rb') as file:
    model = pickle.load(file)

model.eval()

In [None]:
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Subset
import random
from torch import nn

In [None]:
#Preprocess with only 200 images
def preprocess_inputs(filepath):

    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    val_dataset = datasets.ImageFolder(root=filepath, transform=preprocess)
    indices = random.sample(range(len(val_dataset)), 200)
    reduced_dataset = Subset(val_dataset, indices)

    imagenette_val = DataLoader(reduced_dataset, batch_size=32, shuffle=True)

    return imagenette_val

In [None]:
def evaluate(model, dataloader, attack=None, device='cuda'):
    model.eval()
    correct = 0
    total = 0
    total_loss = 0
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad() if attack is None else torch.enable_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            if attack is not None:
                images = attack(model, images, labels)

            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * labels.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    avg_loss = total_loss / total
    return accuracy, avg_loss

In [None]:
#Defining the attacks
from torchattacks import CW

In [None]:
# Custom attack
def adversarial_attack(model, clean_img, targets, lambda_reg=0.1, epsilon=0.03, iterations=10):
    delta = torch.zeros_like(clean_img, requires_grad=True)

    optimizer = torch.optim.Adam([delta], lr=0.01)

    for _ in range(iterations):
        preds = model(clean_img + delta)
        loss = F.cross_entropy(preds, targets)

        # Regularization-aware perturbation loss (modify R(delta) as needed)
        reg_loss = lambda_reg * torch.norm(delta, p=2)

        total_loss = loss - reg_loss  # Counteract the regularizer
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        # Keep perturbations within a valid range
        delta.data = torch.clamp(delta, -epsilon, epsilon)
        delta.data = torch.clamp(clean_img + delta, 0, 1) - clean_img

    return clean_img + delta


# Fast Gradient Sign Method
def fgsm_attack(model, clean_img, targets, epsilon=0.03):
    clean_img = clean_img.clone().detach()
    clean_img.requires_grad = True
    output = model(clean_img)
    loss = F.cross_entropy(output, targets)
    #clean_img.requires_grad = True
    model.zero_grad()
    loss.backward()

    perturbed_img = clean_img + epsilon * clean_img.grad.sign()
    perturbed_img = torch.clamp(perturbed_img, 0, 1)

    return perturbed_img


#Projected Gradient Descent
def pgd_attack(model, clean_img, targets, alpha=0.1, epsilon=0.03, iterations=10):

  clean_copy = clean_img.clone()

  for i in range(clean_img.size(0)):
      x_adv = clean_img.clone().detach()
      x_adv.requires_grad = True

      for _ in range(iterations):
          outputs = model(x_adv)
          loss = F.cross_entropy(outputs, targets)
          model.zero_grad()
          loss.backward()
          x_adv = x_adv + alpha * x_adv.grad.sign()
          x_adv = torch.min(torch.max(x_adv, clean_copy - epsilon), clean_copy + epsilon)
          x_adv = torch.clamp(x_adv, 0, 1).detach().requires_grad_()

  return x_adv



#Carlini and Wagner
# def cw_attack(model, clean_img, targets, c=1.0, kappa=0, steps=1000, lr=0.01, num_samples=100):
#     atk = CW(model, c=c, kappa=kappa, steps=steps, lr=lr)
#     atk.set_return_type('float')  # returns tensor

#     perturbed = atk(clean_img, targets)

#     return perturbed


In [None]:
# Load about 200 images to start
val_data_path = '/content/drive/MyDrive/imagenette2/val'
imagenette_val = preprocess_inputs(val_data_path)

In [None]:
print(imagenette_val)

In [None]:
#Evaluating on all attacks

print("Evaluating on clean data...")
clean_acc, clean_loss = evaluate(model, imagenette_val, attack=None)
print(f"Clean Accuracy: {clean_acc:.4f} | Loss: {clean_loss:.4f}")

print("Evaluating on adversarial (custom attack) data...")
adv_acc, adv_loss = evaluate(model, imagenette_val, attack=adversarial_attack)
print(f"Adversarial Accuracy: {adv_acc:.4f} | Loss: {adv_loss:.4f}")

# print("Evaluating on adversarial (C&W) data...")
# adv_acc, adv_loss = evaluate(model, imagenette_val, attack=cw_attack)
# print(f"Adversarial Accuracy: {adv_acc:.4f} | Loss: {adv_loss:.4f}")

In [None]:
print("Evaluating on adversarial (FGSM) data...")
adv_acc, adv_loss = evaluate(model, imagenette_val, attack=fgsm_attack)
print(f"Adversarial Accuracy: {adv_acc:.4f} | Loss: {adv_loss:.4f}")

print("Evaluating on adversarial (PGD) data...")
adv_acc, adv_loss = evaluate(model, imagenette_val, attack=pgd_attack)
print(f"Adversarial Accuracy: {adv_acc:.4f} | Loss: {adv_loss:.4f}")

In [None]:
#Tuning hyperparameters

In [None]:
clean_img, targets = next(iter(imagenette_val))

clean_img = clean_img.to('cuda')
targets = targets.to('cuda')

adv_image = adversarial_attack(model, clean_img, targets, lambda_reg=0.1, epsilon=0.03, iterations=10)

save_image(adv_image[0], "adv_01_003_10.png")

In [None]:
adv_image = adversarial_attack(model, clean_img, targets, lambda_reg=0.05, epsilon=0.01, iterations=10)

save_image(adv_image[0], "adv_005_001_10.png")

In [None]:
adv_image = adversarial_attack(model, clean_img, targets, lambda_reg=0.2, epsilon=0.06, iterations=10)

save_image(adv_image[0], "adv_02_006_10.png")

In [None]:
adv_image = adversarial_attack(model, clean_img, targets, lambda_reg=0.05, epsilon=0.01, iterations=5)

save_image(adv_image[0], "adv_05_001_5.png")