# Main Code

This Notebook is where we develop all initial code for this experiment, and produce initial results.

## Preprocessing and Definitions

Preprocess data and define attack + regularizer

In [None]:
import torch
import torch.nn.functional as F
from torchvision.models import resnet50, ResNet50_Weights
from PIL import Image
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import pandas as pd

In [None]:
from torch.utils.data import DataLoader, Subset

In [None]:
def preprocess_inputs(filepath):

    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    val_dataset = datasets.ImageFolder(root=filepath, transform=preprocess)
    every_other_index = list(range(0, len(val_dataset), 2))
    reduced_dataset = Subset(val_dataset, every_other_index)

    imagenette_val = DataLoader(reduced_dataset, batch_size=32, shuffle=True)

    # #input_batch = []
    # input_tensors = []

    # for image in images:
    #     input_img = Image.open(image)
    #     input_tensor = preprocess(input_img)
    #     input_tensors.append(input_tensor)

    # input_batch = torch.stack(input_tensors)

    return imagenette_val #returns the dataloader

#variance based adam regularizer
#recomputes loss
def variance_adam_regularizer(model, inputs, targets, loss_fn, lambda_reg=0.1):
    """
    Computes loss + variance-based regularization term.

    Args:
        model: pretrained resnet
        inputs: input batch
        targets: target labels
        loss_fn: (nn.CrossEntropyLoss)
        lambda_reg: strength of reg

    Returns:
        total loss (mean + lambda * variance)
    """
    outputs = model(inputs)
    per_sample_loss = loss_fn(outputs, targets)  # shape: [batch_size]

    # Mean and variance of the per-sample losses
    mean_loss = per_sample_loss.mean()
    variance = per_sample_loss.var(unbiased=False)

    # Total loss with variance regularization
    total_loss = mean_loss + lambda_reg * variance
    return total_loss

# basic gradient norm regularizer
# recomputes loss
def gradient_norm_regularizer(model, inputs, targets, loss_fn, lambda_reg=0.1):
    inputs.requires_grad = True  # Track gradients
    outputs = model(inputs)
    loss = loss_fn(outputs, targets)

    grads = torch.autograd.grad(loss, inputs, create_graph=True)[0]
    grad_norm = torch.norm(grads, p=2)  # Compute L2 norm of gradients

    total_loss = loss + lambda_reg * grad_norm  # Add regularization
    return total_loss


# Step 3: defining optimizer-based attack
def adversarial_attack(model, clean_img, targets, lambda_reg=0.1, epsilon=0.03, iterations=10):
    delta = torch.zeros_like(clean_img, requires_grad=True)

    optimizer = torch.optim.Adam([delta], lr=0.01)

    for _ in range(iterations):
        preds = model(clean_img + delta)
        loss = F.cross_entropy(preds, targets)

        # Regularization-aware perturbation loss
        reg_loss = lambda_reg * torch.norm(delta, p=2)

        total_loss = loss - reg_loss  # Counteract the regularizer
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        # Keep perturbations within a valid range
        delta.data = torch.clamp(delta, -epsilon, epsilon)
        delta.data = torch.clamp(clean_img + delta, 0, 1) - clean_img

    return clean_img + delta

## Training the model

In [None]:
import torch
import torch.nn.functional as F
from torchvision.models import resnet50, ResNet50_Weights
from PIL import Image
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import pandas as pd
import pickle
import os
from torchvision.utils import save_image

In [None]:
import os

train_data_path = '/content/drive/MyDrive/imagenette2/train'
val_data_path = '/content/drive/MyDrive/imagenette2/val'
# for cls in os.listdir(train_data_path):
#     print(cls, "->", len(os.listdir(os.path.join(train_data_path, cls))), "files")

In [None]:
imagenette_train = preprocess_inputs(train_data_path)
imagenette_val = preprocess_inputs(val_data_path)

In [None]:
#getting pretrained resnet50 imagenet
model = resnet50(weights=ResNet50_Weights.DEFAULT)

model = model.to('cuda')

# output_root = 'attack_imagenette'

# for class_name in labels:
#     os.makedirs(output_root, exist_ok=True)

#imagenette_train = preprocess_inputs('/content/imagenette2/imagenette2/train')


# Set hyperparameters
num_epochs = 10
batch_size = 64
learning_rate = 0.001

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model...
for epoch in range(num_epochs):
    for inputs, labels in imagenette_train:
        # Move input and label tensors to the device
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')
        #breakpoint()

        # Zero out the optimizer
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        # loss = criterion(outputs, labels)
        loss = variance_adam_regularizer(model, inputs, labels, criterion) #regularizer part

        # Backward pass
        loss.backward()
        optimizer.step()

        #print(f'Loss: {loss.item():.4f}')
        #print(f'Output: {outputs}')

    # Print the loss for every epoch
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')

print(f'Finished Training, Loss: {loss.item():.4f}')

with open('model.pkl', 'wb') as file:
    pickle.dump(model, file)

idx = 0

## Testing and Evaluation

Session 2: saving dataset, testing, and random visualizations

In [None]:
with open('model.pkl', 'rb') as file:
    model = pickle.load(file)

model.eval()  # Important if you're doing inference

In [None]:
# input = 224 x 224 x 3
class_names = ['tench', 'English springer', 'cassette player',
          'chain saw', 'church', 'French horn',
          'garbage truck', 'gas pump', 'golf ball', 'parachute']

In [None]:
output_root = 'attack_imagenette'

for class_name in class_names:
    class_dir = os.path.join(output_root, class_name)
    os.makedirs(class_dir, exist_ok=True)

In [None]:
#save ALL attacked images to attack_imagenette

idx = 0

for inputs, labels in imagenette_val:
    model = model.to('cuda')
    inputs = inputs.to('cuda')
    labels = labels.to('cuda')

    attacked_batch = adversarial_attack(model, inputs, labels)

    for i in range(attacked_batch.size(0)):
      true_label = labels[i].item()
      img = attacked_batch[i]
      # Get the folder corresponding to the true label
      class_name = class_names[true_label]
      class_folder = os.path.join(output_root, class_name)

      output_path = os.path.join(class_folder, f"{idx:05d}.png")
      save_image(img.unsqueeze(0), output_path)

      idx = idx + 1

In [None]:
import tqdm
from torch import nn

In [None]:
# Load image
img = Image.open('/content/attack_imagenette/chain saw/00047.png')

# Define transformations: Resize and convert to tensor
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to fit model input size (e.g., 224x224 for ResNet)
    transforms.ToTensor(),          # Convert image to tensor
])

# Apply transformations
img_tensor = transform(img).unsqueeze(0)  # Add batch dimension

# Move tensor to the same device as model (GPU/CPU)
img_tensor = img_tensor.to('cuda')

with torch.no_grad():  # No need to track gradients during inference
    output = model(img_tensor)

# Get predicted label (index of max output)
_, predicted_label = torch.max(output, 1)

# Print predicted label
class_label = class_names[predicted_label.item()]
print(f"Predicted label: {class_label}")


In [None]:
def evaluate(model, dataloader, attack=None, device='cpu'):
    model.eval()
    correct = 0
    total = 0
    total_loss = 0
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad() if attack is None else torch.enable_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            if attack is not None:
                images = attack(model, images, labels)

            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * labels.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    avg_loss = total_loss / total
    return accuracy, avg_loss

Various evaluations on model, attack, and regularizer

In [None]:
#evaluate on clean AND attacked data
# imagenette_clean = preprocess_inputs()
print("Evaluating on clean data...")
clean_acc, clean_loss = evaluate(model, imagenette_val, attack=None)
print(f"Clean Accuracy: {clean_acc:.4f} | Loss: {clean_loss:.4f}")

#TODO: make adv attack data loader
# attacked_val = preprocess_inputs('/content/attack_imagenette')

print("Evaluating on adversarial (custom attack) data...")
adv_acc, adv_loss = evaluate(model, imagenette_val, attack=adversarial_attack)
print(f"Adversarial Accuracy: {adv_acc:.4f} | Loss: {adv_loss:.4f}")

In [None]:
unregular_model = resnet50(weights=ResNet50_Weights.DEFAULT)
unregular_model = unregular_model.to('cuda')
print("Evaluating unregularized model on clean data...")
clean_acc, clean_loss = evaluate(unregular_model, imagenette_val, attack=None, device='cuda')
print(f"Clean Accuracy: {clean_acc:.4f} | Loss: {clean_loss:.4f}")

#TODO: make adv attack data loader
# attacked_val = preprocess_inputs('/content/attack_imagenette')

In [None]:
print("Evaluating on unregularized model on adversarial (custom attack) data...")
unregular_model = unregular_model.to('cuda')
adv_acc, adv_loss = evaluate(unregular_model, imagenette_val, attack=adversarial_attack, device='cuda')
print(f"Adversarial Accuracy: {adv_acc:.4f} | Loss: {adv_loss:.4f}")

## Results and visualizations

Plotting preliminary results

In [None]:
import matplotlib.pyplot as plt

# Data
epochs = list(range(1, 11))
losses = [0.3337, 0.1190, 0.1235, 0.0661, 0.3400,
          0.0031, 0.0952, 0.1418, 0.0176, 0.2048]

# Plot
plt.figure(figsize=(8, 5))
plt.plot(epochs, losses, marker='o', linestyle='-', color='blue')

# Labels and title
plt.title('Training Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.ylim(top=1)
plt.grid(True)
plt.xticks(epochs)

# Show
plt.show()

In [None]:
import numpy as np

# Data
labels = ['Clean', 'Adversarial']
accuracy = [0.9609, 0.9050]
loss = [0.1608, 0.3678]

x = np.arange(len(labels))  # label locations
width = 0.35  # bar width

# Plot
fig, ax = plt.subplots(figsize=(8, 5))
bar1 = ax.bar(x - width/2, accuracy, width, label='Accuracy', color='green')
bar2 = ax.bar(x + width/2, loss, width, label='Loss', color='red')

# Labels and formatting
ax.set_ylabel('Value')
ax.set_title('Clean vs Adversarial Evaluation')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
ax.set_ylim(0, 1.1)

# Annotate bars
for bars in [bar1, bar2]:
    for bar in bars:
        height = bar.get_height()
        ax.annotate(f'{height:.2f}',
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(0, 5),
                    textcoords="offset points",
                    ha='center', va='bottom')

# Show
plt.tight_layout()
plt.show()

In [None]:
dl = iter(imagenette_val)
images, labels = next(dl)

output_path = "/content/grid.png"
save_image(images.unsqueeze(0), output_path)

In [None]:
from torchvision.utils import make_grid, save_image
import os

# Get the first batch
dataiter = iter(imagenette_val)
images, labels = next(dataiter)

# Create a grid from the batch
grid = make_grid(images, nrow=8, normalize=True, padding=2)

# Save the grid to a file
output_path = "/content/grid.png"
save_image(grid, output_path)

print(f"Saved grid image to {output_path}")

In [None]:
with open('model (1).pkl', 'rb') as file:
    model = pickle.load(file)

model.eval()  # Important if you're doing inference

In [None]:
from torchvision.utils import make_grid, save_image
import os

# Get the first batch
dataiter = iter(imagenette_val)
images, labels = next(dataiter)

images = images.to('cuda')
labels = labels.to('cuda')
model = model.to('cuda')

attacked_batch = adversarial_attack(model, images, labels, lambda_reg=0.05, epsilon=0.01)

# Create a grid from the batch
grid = make_grid(attacked_batch, nrow=8, normalize=True, padding=2)

# Save the grid to a file
output_path = "/content/grid_attack.png"
save_image(grid, output_path)

print(f"Saved grid image to {output_path}")