# Deep Learning Project 3 - Jailbreaking Deep Models

In [28]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.nn.functional import softmax
import json
from tqdm import tqdm

## Task 1: Basics

In [50]:
mean_norms = np.array([0.485, 0.456, 0.406])
std_norms = np.array([0.229, 0.224, 0.225])

plain_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean_norms,
    std=std_norms)
])

In [57]:
# Maps dataset labels correctly to ImageNet classes
class ImageNetMappedDataset(torch.utils.data.Dataset):
    def __init__(self, base_dataset, offset=401):
        self.base = base_dataset
        self.offset = offset

    def __getitem__(self, idx):
        img, label = self.base[idx]
        return img, label + self.offset  # remap label

    def __len__(self):
        return len(self.base)

In [76]:
dataset_path = "./TestDataSet"
dataset = torchvision.datasets.ImageFolder(root=dataset_path, transform=plain_transforms)
dataset = ImageNetMappedDataset(dataset)

In [77]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

Using device: mps


In [78]:
# Load pretrained ResNet-34
pretrained_model = torchvision.models.resnet34(weights='IMAGENET1K_V1')
pretrained_model.eval().to(device)
pass

In [82]:
def evaluate(model, dataset):
    loader = DataLoader(dataset, batch_size=32, shuffle=False)
    top1_correct, top5_correct, total = 0, 0, 0
    
    with torch.no_grad():
        for imgs, labels in tqdm(loader):
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            probs = softmax(outputs, dim=1)
            
            _, top5 = probs.topk(5, dim=1)
            top1 = top5[:, 0]
            top1_correct += (top1 == labels).sum().item()
            top5_correct += sum([label in top5[i] for i, label in enumerate(labels)])
            total += labels.size(0)
            
    top1_acc = 100 * top1_correct / total
    top5_acc = 100 * top5_correct / total

    print(f"Top-1 Accuracy: {top1_acc:.2f}%")
    print(f"Top-5 Accuracy: {top5_acc:.2f}%")
    
    return top1_acc, top5_acc

In [83]:
top1_acc, top5_acc = evaluate(pretrained_model, dataset)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:01<00:00,  9.42it/s]

Top-1 Accuracy: 76.00%
Top-5 Accuracy: 94.00%





## Task 2:  Pixel-wise attacks

In [None]:
adversarial_test_set_1 = dataset

## Task 3: Improved attacks

In [None]:
adversarial_test_set_2 = dataset

## Task 4: Patch attacks

In [None]:
adversarial_test_set_3 = dataset

## Task 5: Transferring attacks