The purpose of this notebook is to collect adversarial samples for each dataset for varying attack parameters and save them as numpy arrays so that we can use them by loading as numpy files easily for later experiments. Saves a lot of time! 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import matplotlib
import matplotlib.pyplot as plt
from cleverhans.torch.attacks.fast_gradient_method import fast_gradient_method
from cleverhans.torch.attacks.projected_gradient_descent import (
    projected_gradient_descent,
)
import gc
from captum.attr import *
import quantus
from torch.utils.data import DataLoader
import gc
import torchvision.transforms as transforms
from art.attacks.evasion import CarliniLInfMethod
import torch.optim as optim
from art.estimators.classification import PyTorchClassifier
from art.attacks.evasion import BasicIterativeMethod
import os
import torch.optim as optim
import torchvision.models as models

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
device

device(type='cuda')

# Various attack functions

In [5]:
def make_pgd_attack(x_batch, y_batch, eps, normal_model): 
    
    alpha = eps/10
    steps = 40
    images_pgd = projected_gradient_descent(normal_model, x_batch, eps, alpha, steps, np.inf)
    _, y_pred_pgd = normal_model(images_pgd).max(1)
    index = (y_pred_pgd != y_batch)
    pgd_images = images_pgd[index]
    y_pred_pgd = y_pred_pgd[index]
    return pgd_images, y_pred_pgd

In [6]:
def make_fgsm_attack(x_batch, y_batch, eps, normal_model): 
    images_pgd = fast_gradient_method(normal_model, x_batch, eps, np.inf)
    _, y_pred_pgd = normal_model(images_pgd).max(1)
    index = (y_pred_pgd != y_batch)
    pgd_images = images_pgd[index]
    y_pred_pgd = y_pred_pgd[index]
    return pgd_images, y_pred_pgd

In [7]:
def cw_linf_attack(model, images, labels, epsilon, confidence, num_iterations, learning_rate):
    batch_size = images.size(0)
    image_size = images.size(2)
    num_classes = model(images).size(1)

    # Define the box constraints for the adversarial perturbation
    box_min = torch.clamp(images - epsilon, min=0)
    box_max = torch.clamp(images + epsilon, max=1)

    # Initialize the adversarial perturbation as a small random noise
    perturbation = torch.zeros_like(images).uniform_(-epsilon, epsilon).to(images.device)
    perturbation.requires_grad = True

    # Define the optimizer
    optimizer = optim.Adam([perturbation], lr=learning_rate)

    # Perform optimization to find the adversarial perturbation
    for _ in range(num_iterations):
        optimizer.zero_grad()

        # Create adversarial images with perturbation
        images_adv = torch.clamp(images + perturbation, min=0, max=1)

        # Compute the model's logits for the adversarial images
        outputs = model(images_adv)

        # Compute the adversarial loss
        real_logits = outputs.gather(1, labels.unsqueeze(1)).squeeze(1)
        other_logits = outputs - torch.eye(num_classes).to(images.device)[labels].unsqueeze(1)
        max_other_logits, _ = other_logits.max(1)
        adversarial_loss = torch.max(torch.zeros_like(real_logits), max_other_logits - real_logits + confidence)

        # Compute the total loss as a combination of adversarial loss and L-infinity norm
        total_loss = adversarial_loss.mean() + torch.norm(perturbation.view(batch_size, -1), p=float('inf'), dim=1).mean()

        # Backpropagation and optimization step
        total_loss.backward()
        optimizer.step()

        # Project the perturbation back into the L-infinity box constraints
        perturbation.data = torch.max(torch.min(perturbation.detach(), box_max - images), box_min - images)

    # Create adversarial images with the final perturbation
    images_adv = torch.clamp(images + perturbation, min=0, max=1)
    
    return images_adv.detach()

In [8]:
def make_cw_attack(model, images, labels, epsilon, confidence, num_iterations, learning_rate):
    
    x_adv = cw_linf_attack(model, images, labels, epsilon, confidence, num_iterations, learning_rate)
    _, y_test = model(x_adv).max(1)
    index = (y_test != labels)
    adv_images = x_adv[index]
    y_pred_adv = y_test[index]
    return adv_images, y_pred_adv

In [9]:
def bim_attack(images, labels, eps, model):
    
    images = images.clone().detach().to(device)
    labels = labels.clone().detach().to(device)
    
    loss = nn.CrossEntropyLoss()
    adv_images = images.clone().detach()
    
    alpha = eps/10
    iters = 10
        
    for i in range(iters):    
        adv_images.requires_grad = True
        outputs = model(adv_images)
        
        #calculate loss 
        cost = loss(outputs, labels)
        
        #update adversarial images
        grad = torch.autograd.grad(cost, adv_images, retain_graph=False, create_graph=False)[0]
        adv_images = adv_images.detach() + alpha*grad.sign()
        delta = torch.clamp(adv_images - images, min=-eps, max=eps)
        adv_images = torch.clamp(images + delta, min=0, max=1).detach()
        
       
    return adv_images

In [10]:
def make_bim_attack(x_batch, y_batch, eps, normal_model):
    x_test = bim_attack(x_batch, y_batch, eps, normal_model)
    #convert the nd array back to tensor
    _, y_test = normal_model(x_test).max(1)
    index = (y_test != y_batch)
    adv_images = x_test[index]
    y_pred_adv = y_test[index]
    return adv_images, y_pred_adv

## Attack on CIFAR

In [10]:
#from rev2.cifar10.model_utils import resnet50, CIFAR10_RESNET50_CKPT_PATH

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, out_keys=None):
        out = {}
        x = self.conv1(x)
        out["c1"] = x
        x = self.bn1(x)
        out["bn1"] = x
        x = F.relu(x)
        out["r1"] = x

        x = self.layer1(x)
        out["l1"] = x
        x = self.layer2(x)
        out["l2"] = x
        x = self.layer3(x)
        out["l3"] = x
        x = self.layer4(x)
        out["l4"] = x

        x = F.avg_pool2d(x, 4)
        out["gvp"] = x
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        out["fc"] = x

        if out_keys is None:
            return x
        res = {}
        for key in out_keys:
            res[key] = out[key]
        return res


def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])


def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3])


def resnet50():
    return ResNet(Bottleneck, [3,4,6,3])


def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])


def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])


def test():
    net = ResNet18()
    y = net(torch.randn(1,3,32,32))
    print(y.size())

In [11]:
def load_cifar_model(path):
    model = resnet50()
    ckpt_dict = torch.load(path, lambda storage, loc: storage)
    model.load_state_dict(ckpt_dict)
    model.to('cuda')
    model.train(False)
    return model

In [12]:
modelpath = "/data/virtual environments/adv detection by robustness/adv_detection/Adaptive attacks/Models/CIFAR10/resnet50/cifar.ckpt"

In [13]:
normal_model = load_cifar_model(modelpath)
normal_model.to(device)
normal_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (

In [14]:
#save image as np arrays 
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=True, transform=torchvision.transforms.ToTensor())
test_loader_cifar = DataLoader(testset, shuffle=True, batch_size=10)

Files already downloaded and verified


In [15]:
def compute_fgsm_cifar(train_loader_cifar, normal_model, eps, save_dir):

    print("Saving adversarial images for {} for FGSM".format(eps))

    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    
    check = 0

    for step, (x_batch, y_batch) in enumerate(train_loader_cifar):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()

        #create adv samples
        images_adv,y_pred_adv = make_fgsm_attack(x_batch, y_batch, eps, normal_model)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check += len(images_adv)

        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)

        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())

        if (check %50 == 0):
            print(check)
        if check > 1000:
            break

    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)


    np.savez(os.path.join(save_dir, str(eps)+'eps.npz'), a_images=adv_images, a_labels=adv_labels, b_images=ben_images, b_labels=ben_labels)


#compute adv samples
epsilons = [8/255, 16/255, 32/255, 64/255]
save_dir = 'adv samples/CIFAR/FGSM/'
for eps in epsilons: 
    compute_fgsm_cifar(test_loader_cifar, normal_model, eps, save_dir)

Saving adversarial images for 0.03137254901960784 for FGSM
700
900
Saving adversarial images for 0.06274509803921569 for FGSM
200
300
350
550
700
750
Saving adversarial images for 0.12549019607843137 for FGSM
100
250
350
400
800
900
Saving adversarial images for 0.25098039215686274 for FGSM
250


In [16]:
def compute_pgd_cifar(train_loader_cifar, normal_model, eps, save_dir):

    print("Saving adversarial images for {} for PGD".format(eps))

    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    
    check = 0

    for step, (x_batch, y_batch) in enumerate(train_loader_cifar):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()

        #create adv samples
        images_adv,y_pred_adv = make_pgd_attack(x_batch, y_batch, eps, normal_model)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check+=len(images_adv)

        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)

        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())

        if (check %50 == 0):
            print(check)
        if check > 1000:
            break

    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)
    
#launch attack
epsilons = [8/255, 16/255, 32/255, 64/255]
save_dir = 'adv samples/CIFAR/PGD/'
for eps in epsilons: 
    compute_pgd_cifar(test_loader_cifar, normal_model, eps, save_dir)

Saving adversarial images for 0.03137254901960784 for PGD
350
400
650
Saving adversarial images for 0.06274509803921569 for PGD
200
Saving adversarial images for 0.12549019607843137 for PGD
900
950
1000
Saving adversarial images for 0.25098039215686274 for PGD


In [17]:
def compute_bim_cifar(train_loader_cifar, normal_model, eps, save_dir):

    print("Saving adversarial images for {} for BIM".format(eps))
    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    
    check = 0

    for step, (x_batch, y_batch) in enumerate(train_loader_cifar):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()

        #create adv samples
        images_adv,y_pred_adv = make_bim_attack(x_batch, y_batch, eps, normal_model)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check+=len(images_adv)

        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)

        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())

        if (check %50 == 0):
            print(check)
        if check > 1000:
            break

    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)

    np.savez(os.path.join(save_dir, str(eps)+'eps.npz'), a_images=adv_images, a_labels=adv_labels, b_images=ben_images, b_labels=ben_labels)
    
#launch attack
epsilons = [8/255, 16/255, 32/255, 64/255]
save_dir = 'adv samples/CIFAR/BIM/'
for eps in epsilons: 
    compute_bim_cifar(test_loader_cifar, normal_model, eps, save_dir)

Saving adversarial images for 0.03137254901960784 for BIM
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
Saving adversarial images for 0.06274509803921569 for BIM
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
Saving adversarial images for 0.12549019607843137 for BIM
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
Saving adversarial images for 0.25098039215686274 for BIM
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000


In [18]:
def compute_cw_cifar(train_loader_cifar, normal_model, eps, conf, save_dir): 
    
    print("Saving adversarial images for eps {} and confidence {} for CW".format(eps, conf))
    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    
    num_iterations = 400
    learning_rate = 0.01
    check=0
        
    for step, (x_batch, y_batch) in enumerate(train_loader_cifar):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()
        
        
        images_adv,y_pred_adv = make_cw_attack(normal_model, x_batch, y_batch, eps, conf, num_iterations, learning_rate)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check+=len(images_adv)
        
        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)
        
        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())
        
        if (check)%50==0:
            print((check)) 
            
        if check > 1000:
            break
        
    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)  
    
    np.savez(os.path.join(save_dir, str(eps)+'eps.npz'), a_images=adv_images, a_labels=adv_labels, b_images=ben_images, b_labels=ben_labels)
   
    
#launch attack
confidence = [0]
epsilons = 0.15
save_dir = 'adv samples/CIFAR/CW/'
for conf in confidence: 
    compute_cw_cifar(test_loader_cifar, normal_model, epsilons, conf, save_dir)

Saving adversarial images for eps 0.15 and confidence 0 for CW
50
500
950
1000
1000


## Attack on MNIST

In [11]:
#for natural and adversarial LeNet Model 
class LeNet_normal(torch.nn.Module):
    """Network architecture from: https://github.com/ChawDoe/LeNet5-MNIST-PyTorch."""
    def __init__(self):
        super().__init__()
        self.conv_1 = torch.nn.Conv2d(1, 6, 5)
        self.pool_1 = torch.nn.MaxPool2d(2, 2)
        self.relu_1 = torch.nn.ReLU()
        self.conv_2 = torch.nn.Conv2d(6, 16, 5)
        self.pool_2 = torch.nn.MaxPool2d(2, 2)
        self.relu_2 = torch.nn.ReLU()
        self.fc_1 = torch.nn.Linear(256, 120)
        self.relu_3 = torch.nn.ReLU()
        self.fc_2 = torch.nn.Linear(120, 84)
        self.relu_4 = torch.nn.ReLU()
        self.fc_3 = torch.nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool_1(self.relu_1(self.conv_1(x)))
        x = self.pool_2(self.relu_2(self.conv_2(x)))
        x = x.view(x.shape[0], -1)
        x = self.relu_3(self.fc_1(x))
        x = self.relu_4(self.fc_2(x))
        x = self.fc_3(x)
        return x

In [12]:
def load_mnist_model(path):
    model = LeNet_normal()
    model.to(device)
    model.load_state_dict(torch.load(path))
    model.to('cuda')
    model.train(False)
    return model

In [13]:
modelpath = "/data/virtual environments/adv detection by robustness/adv_detection/Adaptive attacks/Models/MNIST/mnist_model.pth"

In [14]:
normal_model = load_mnist_model(modelpath)
normal_model.to(device)
normal_model.eval()

LeNet_normal(
  (conv_1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu_1): ReLU()
  (conv_2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu_2): ReLU()
  (fc_1): Linear(in_features=256, out_features=120, bias=True)
  (relu_3): ReLU()
  (fc_2): Linear(in_features=120, out_features=84, bias=True)
  (relu_4): ReLU()
  (fc_3): Linear(in_features=84, out_features=10, bias=True)
)

In [17]:
test_set = torchvision.datasets.MNIST(root='./sample_data', train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=10, pin_memory=True)

In [18]:
def compute_fgsm(train_loader, normal_model, eps, save_dir):

    print("Saving adversarial images for {} for FGSM".format(eps))

    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    check = 0
    for step, (x_batch, y_batch) in enumerate(train_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()

        #create adv samples
        images_adv,y_pred_adv = make_fgsm_attack(x_batch, y_batch, eps, normal_model)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check+=len(images_adv)

        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)

        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())

        if check % 50 == 0:
            print(check)

        if check > 1000:
            break

    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)

    np.savez(os.path.join(save_dir, str(eps)+'eps.npz'), a_images=adv_images, a_labels=adv_labels, b_images=ben_images, b_labels=ben_labels)

#compute adv samples
epsilons = [8/255, 16/255, 32/255, 64/255]
save_dir = 'adv samples/MNIST/FGSM/'
for eps in epsilons: 
    compute_fgsm(test_loader, normal_model, eps, save_dir)

Saving adversarial images for 0.03137254901960784 for FGSM
100
200
250
250
300
350
350
400
450
450
450
500
500
550
550
550
650
750
750
800
900
950
1000
Saving adversarial images for 0.06274509803921569 for FGSM
150
300
550
600
Saving adversarial images for 0.12549019607843137 for FGSM
100
350
Saving adversarial images for 0.25098039215686274 for FGSM
100
300
350
800


In [19]:
def compute_pgd(train_loader, normal_model, eps, save_dir):

    print("Saving adversarial images for {} for PGD".format(eps))

    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    check = 0
    
    for step, (x_batch, y_batch) in enumerate(train_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()

        #create adv samples
        images_adv,y_pred_adv = make_pgd_attack(x_batch, y_batch, eps, normal_model)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check+=len(images_adv)

        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)

        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())

        if check % 50 == 0:
            print(check)

        if check > 1000:
            break

    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)

    np.savez(os.path.join(save_dir, str(eps)+'eps.npz'), a_images=adv_images, a_labels=adv_labels, b_images=ben_images, b_labels=ben_labels)

    
#launch attack
epsilons = [8/255, 16/255, 32/255, 64/255]
save_dir = 'adv samples/MNIST/PGD/'
for eps in epsilons: 
    compute_pgd(test_loader, normal_model, eps, save_dir)

Saving adversarial images for 0.03137254901960784 for PGD
150
600
750
750
800
950
1000
Saving adversarial images for 0.06274509803921569 for PGD
100
550
850
Saving adversarial images for 0.12549019607843137 for PGD
150
350
400
850
Saving adversarial images for 0.25098039215686274 for PGD


In [20]:
def compute_bim(train_loader, normal_model, eps, save_dir):

    print("Saving adversarial images for {} for BIM".format(eps))
    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    check = 0
    
    for step, (x_batch, y_batch) in enumerate(train_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()

        #create adv samples
        images_adv,y_pred_adv = make_bim_attack(x_batch, y_batch, eps, normal_model)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check+=len(images_adv)

        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)

        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())

        if check % 50 == 0:
            print(check)

        if check > 1000:
            break

    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)

    np.savez(os.path.join(save_dir, str(eps)+'eps.npz'), a_images=adv_images, a_labels=adv_labels, b_images=ben_images, b_labels=ben_labels)
    
#launch attack
epsilons = [8/255, 16/255, 32/255, 64/255]
save_dir = 'adv samples/MNIST/BIM/'
for eps in epsilons: 
    compute_bim(test_loader, normal_model, eps, save_dir)

Saving adversarial images for 0.03137254901960784 for BIM
50
100
100
100
200
250
250
300
300
300
350
350
400
400
400
400
400
450
550
550
650
800
850
850
950
950
950
950
950
Saving adversarial images for 0.06274509803921569 for BIM
50
100
250
550
600
700
700
900
Saving adversarial images for 0.12549019607843137 for BIM
150
500
600
Saving adversarial images for 0.25098039215686274 for BIM
150
550
700
850


In [21]:
def compute_cw(train_loader, normal_model, eps, conf, save_dir): 
    
    print("Saving adversarial images for eps {} and confidence {} for CW".format(eps, conf))
    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    
    num_iterations = 400
    learning_rate = 0.01
    check=0
        
    for step, (x_batch, y_batch) in enumerate(train_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()
        
        
        images_adv,y_pred_adv = make_cw_attack(normal_model, x_batch, y_batch, eps, conf, num_iterations, learning_rate)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check+=len(images_adv)
        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)
        
        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())
        
        if (check)%50==0:
            print((check)) 
            
        if check > 1000:
            break
        
    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)
    
    np.savez(os.path.join(save_dir, str(conf)+str(eps)+'eps.npz'), a_images=adv_images, a_labels=adv_labels, b_images=ben_images, b_labels=ben_labels)
    
#launch attack
confidence = [0]
epsilons = 0.15
save_dir = 'adv samples/MNIST/CW/'
for conf in confidence: 
    compute_cw(test_loader, normal_model, epsilons, conf, save_dir)

Saving adversarial images for eps 0.15 and confidence 0 for CW
0
50
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
150
150
150
150
150
150
150
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
300
300
300
300
300
300
300
350
350
350
350
350
400
450
450
450
450
450
500
500
500
500
550
550
550
550
550
600
600
600
600
600
600
600
600
650
650
650
650
650
650
650
650
700
700
700
700
700
750
750
800
800
800
800
800
800
800
800
850
850
850
900
900
900
950
1000
1000


## Attack on ImageNet

In [22]:
# the validation transforms
valid_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5, 0.5, 0.5],
        std=[0.5, 0.5, 0.5]
    )
])

In [23]:
images = '/home/db1702/Downloads/imagenet-mini/train/'

In [13]:
# def load_imagenet_model():
#     model = models.resnet50(pretrained=True).to(device)
#     model.to('cuda')
#     model.train(False)
#     return model

In [24]:
def load_imagenet_model():
    model=torchvision.models.mobilenet_v3_small(weights=True).to(device)
    model.to('cuda')
    model.train(False)
    return model

In [25]:
normal_model = load_imagenet_model()
normal_model.to(device)
normal_model.eval()

MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): SqueezeExcitation(
          (avgpool): AdaptiveAvgPool2d(output_size=1)
          (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
          (fc2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
          (activation): ReLU()
          (scale_activation): Hardsigmoid()
        )
        (2): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), 

In [26]:
#get dataset
test = torchvision.datasets.ImageFolder(images, transform=valid_transform)
test_loader = DataLoader(test, shuffle=True, batch_size = 2)

In [17]:
def compute_fgsm(train_loader, normal_model, eps, save_dir): 
    
    print("Saving adversarial images for {} for FGSM".format(eps))
    
    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    check = 0 
    
    for step, (x_batch, y_batch) in enumerate(train_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()
        
        #create adv samples    
        images_adv,y_pred_adv = make_fgsm_attack(x_batch, y_batch, eps, normal_model)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check+=len(images_adv)
        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)
        
        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())
        
        
        if check%50==0:
            print(check)
            
        if check > 1000:
            break
        
    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)
    
    np.savez(os.path.join(save_dir, str(eps)+'eps.npz'), a_images=adv_images, a_labels=adv_labels, b_images=ben_images, b_labels=ben_labels)

#compute adv samples
epsilons = [8/255, 16/255, 32/255, 64/255]
save_dir = 'adv samples/IMAGENET/MobileNet/FGSM/'
for eps in epsilons: 
    compute_fgsm(test_loader, normal_model, eps, save_dir)

Saving adversarial images for 0.03137254901960784 for FGSM
100
150
300
400
550
650
700
800
950
1000
Saving adversarial images for 0.06274509803921569 for FGSM
100
200
250
350
400
500
550
650
750
800
Saving adversarial images for 0.12549019607843137 for FGSM
50
100
150
200
250
300
350
400
550
700
850
900
1000
Saving adversarial images for 0.25098039215686274 for FGSM
300
350
400
450
500
600
650
700
950


In [18]:
def compute_pgd(train_loader, normal_model, eps, save_dir): 
    
    print("Saving adversarial images for {} for PGD".format(eps))
    
    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    check = 0 
    
    for step, (x_batch, y_batch) in enumerate(train_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()
        
        #create adv samples    
        images_adv,y_pred_adv = make_pgd_attack(x_batch, y_batch, eps, normal_model)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check+=len(images_adv)
        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)
        
        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())
        
        
        if check%50==0:
            print(check)
            
        if check > 1000:
            break
        
    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)
    
    np.savez(os.path.join(save_dir, str(eps)+'eps.npz'), a_images=adv_images, a_labels=adv_labels, b_images=ben_images, b_labels=ben_labels)
#compute adv samples
epsilons = [8/255, 16/255, 32/255, 64/255]
save_dir = 'adv samples/IMAGENET/MobileNet/PGD/'
for eps in epsilons: 
    compute_pgd(test_loader, normal_model, eps, save_dir)

Saving adversarial images for 0.03137254901960784 for PGD
50
100
150
250
300
450
500
650
750
800
950
Saving adversarial images for 0.06274509803921569 for PGD
100
200
300
500
550
600
650
750
850
950
1000
Saving adversarial images for 0.12549019607843137 for PGD
100
300
400
500
550
600
750
800
850
900
1000
Saving adversarial images for 0.25098039215686274 for PGD
150
200
250
350
550
650
700
750
800
850
900
950


In [19]:
def compute_bim(train_loader, normal_model, eps, save_dir): 
    
    print("Saving adversarial images for {} for BIM".format(eps))
    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    check = 0 
    
    for step, (x_batch, y_batch) in enumerate(train_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()
        
        #create adv samples    
        images_adv,y_pred_adv = make_bim_attack(x_batch, y_batch, eps, normal_model)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check+=len(images_adv)
        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)
        
        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())
        
        
        if check%50==0:
            print(check)
            
        if check > 1000:
            break
        
    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)
    
    np.savez(os.path.join(save_dir, str(eps)+'eps.npz'), a_images=adv_images, a_labels=adv_labels, b_images=ben_images, b_labels=ben_labels)

    
#launch attack
epsilons = [8/255, 16/255, 32/255, 64/255]
save_dir = 'adv samples/IMAGENET/MobileNet/BIM/'
for eps in epsilons: 
    compute_bim(test_loader, normal_model, eps, save_dir)

Saving adversarial images for 0.03137254901960784 for BIM
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
Saving adversarial images for 0.06274509803921569 for BIM
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
Saving adversarial images for 0.12549019607843137 for BIM
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
Saving adversarial images for 0.25098039215686274 for BIM
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000


In [27]:
import torch
from torchvision import models

def imagenet_cw_linf_attack(images, labels, model, epsilon=0.3, confidence=0, max_iterations=400, learning_rate=0.01):
    """
    Implements the CW-Linf (Carlini-Wagner with L-infinity norm) attack on the ImageNet dataset.

    Args:
        images (torch.Tensor): Batch of input images.
        labels (torch.Tensor): Corresponding labels for the input images.
        model: The model to be attacked. It should return logits (pre-softmax outputs).
        epsilon (float): Maximum perturbation allowed (default: 0.03).
        confidence (int): Confidence parameter for the attack (default: 0).
        max_iterations (int): Maximum number of iterations for the attack (default: 1000).
        learning_rate (float): Learning rate for the attack (default: 0.01).

    Returns:
        torch.Tensor: Perturbed images.
    """
    # Set the model in evaluation mode
    model.eval()

    # Clone the original images
    perturbed_images = images.clone().detach()

    # Iterate for the maximum number of iterations
    for _ in range(max_iterations):
        # Zero out the gradients
        perturbed_images.requires_grad_(True)

        # Calculate the logits for the current perturbed images
        logits = model(perturbed_images)

        # Calculate the loss (CW-Linf objective function)
        correct_logits = logits.gather(1, labels.unsqueeze(1)).squeeze()
        wrong_logits = logits - logits.max(dim=1, keepdim=True)[0]
        target_logits = wrong_logits.max(dim=1)[0]
        loss = torch.max(correct_logits - target_logits + confidence, torch.zeros_like(target_logits))

        # Compute the gradients of the loss with respect to the input images
        gradients = torch.autograd.grad(loss.sum(), perturbed_images)[0]

        # Normalize the gradients (L-infinity norm)
        gradients = gradients.sign()

        # Update the perturbed images using the gradients
        perturbed_images = perturbed_images + learning_rate * gradients

        # Clip the perturbed images to ensure they stay within the epsilon ball
        perturbed_images = torch.clamp(perturbed_images, images - epsilon, images + epsilon)
        perturbed_images = torch.clamp(perturbed_images, 0, 1)

    return perturbed_images


In [28]:
def imagenet_make_cw_attack(images, labels, model,epsilon, confidence, num_iterations, learning_rate):
    
    x_adv = imagenet_cw_linf_attack(images, labels, model, epsilon, confidence, num_iterations, learning_rate)
    _, y_test = model(x_adv).max(1)
    index = (y_test != labels)
    adv_images = x_adv[index]
    y_pred_adv = y_test[index]
    return adv_images, y_pred_adv

In [29]:
def compute_cw(train_loader, normal_model, eps, conf, save_dir): 
    
    print("Saving adversarial images for eps {} and confidence {} for CW".format(eps, conf))
    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    check = 0
    
    num_iterations = 400
    learning_rate = 0.01
    check=0
        
    for step, (x_batch, y_batch) in enumerate(train_loader):
        #print(x_batch.shape)
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()
        
        
        images_adv,y_pred_adv = imagenet_make_cw_attack(x_batch, y_batch,normal_model, eps, conf, num_iterations, learning_rate)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check+=len(images_adv)
        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())
        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)
        
        if (check)%50==0:
            print((check)) 
            
        if check > 1000:
            break
        
    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)
    
    np.savez(os.path.join(save_dir, str(eps)+'eps.npz'), a_images=adv_images, a_labels=adv_labels, b_images=ben_images, b_labels=ben_labels)

    
    
#launch attack
confidence = [0]
epsilons = 0.15
save_dir = 'adv samples/IMAGENET/MobileNet/CW/'
for conf in confidence: 
    compute_cw(test_loader, normal_model, epsilons, conf, save_dir)

Saving adversarial images for eps 0.15 and confidence 0 for CW
0
0
0
0
50
50
50
50
50
50
50
50
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
200
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
300
300
300
300
300
300
300
300
300
300
300
300
300
300
350
350
350
350
400
400
400
400
400
400
400
400
400
400
450
500
500
500
500
550
550
600
600
600
600
600
600
600
600
600
600
600
600
600
600
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
650
700
750
800
800
850
850
900
900
900
900
900
900
900
900
900
900
950
1000
1000
1000
1000
1000


# cw images resnet model

In [30]:
# the validation transforms
valid_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5, 0.5, 0.5],
        std=[0.5, 0.5, 0.5]
    )
])

In [31]:
images = '/home/db1702/Downloads/imagenet-mini/train/'

In [32]:
def load_imagenet_model():
    model = models.resnet50(pretrained=True).to(device)
    model.to('cuda')
    model.train(False)
    return model

In [33]:
normal_model = load_imagenet_model()
normal_model.to(device)
normal_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [34]:
#get dataset
test = torchvision.datasets.ImageFolder(images, transform=valid_transform)
test_loader = DataLoader(test, shuffle=True, batch_size = 2)

In [35]:
import torch
from torchvision import models

def imagenet_cw_linf_attack(images, labels, model, epsilon=0.3, confidence=0, max_iterations=400, learning_rate=0.01):
    """
    Implements the CW-Linf (Carlini-Wagner with L-infinity norm) attack on the ImageNet dataset.

    Args:
        images (torch.Tensor): Batch of input images.
        labels (torch.Tensor): Corresponding labels for the input images.
        model: The model to be attacked. It should return logits (pre-softmax outputs).
        epsilon (float): Maximum perturbation allowed (default: 0.03).
        confidence (int): Confidence parameter for the attack (default: 0).
        max_iterations (int): Maximum number of iterations for the attack (default: 1000).
        learning_rate (float): Learning rate for the attack (default: 0.01).

    Returns:
        torch.Tensor: Perturbed images.
    """
    # Set the model in evaluation mode
    model.eval()

    # Clone the original images
    perturbed_images = images.clone().detach()

    # Iterate for the maximum number of iterations
    for _ in range(max_iterations):
        # Zero out the gradients
        perturbed_images.requires_grad_(True)

        # Calculate the logits for the current perturbed images
        logits = model(perturbed_images)

        # Calculate the loss (CW-Linf objective function)
        correct_logits = logits.gather(1, labels.unsqueeze(1)).squeeze()
        wrong_logits = logits - logits.max(dim=1, keepdim=True)[0]
        target_logits = wrong_logits.max(dim=1)[0]
        loss = torch.max(correct_logits - target_logits + confidence, torch.zeros_like(target_logits))

        # Compute the gradients of the loss with respect to the input images
        gradients = torch.autograd.grad(loss.sum(), perturbed_images)[0]

        # Normalize the gradients (L-infinity norm)
        gradients = gradients.sign()

        # Update the perturbed images using the gradients
        perturbed_images = perturbed_images + learning_rate * gradients

        # Clip the perturbed images to ensure they stay within the epsilon ball
        perturbed_images = torch.clamp(perturbed_images, images - epsilon, images + epsilon)
        perturbed_images = torch.clamp(perturbed_images, 0, 1)

    return perturbed_images


In [36]:
def imagenet_make_cw_attack(images, labels, model,epsilon, confidence, num_iterations, learning_rate):
    
    x_adv = imagenet_cw_linf_attack(images, labels, model, epsilon, confidence, num_iterations, learning_rate)
    _, y_test = model(x_adv).max(1)
    index = (y_test != labels)
    adv_images = x_adv[index]
    y_pred_adv = y_test[index]
    return adv_images, y_pred_adv

In [37]:
def compute_cw(train_loader, normal_model, eps, conf, save_dir): 
    
    print("Saving adversarial images for eps {} and confidence {} for CW".format(eps, conf))
    adversarial_images = []
    adversarial_labels = []
    benign_images = []
    benign_labels = []
    check = 0
    
    num_iterations = 400
    learning_rate = 0.01
    check=0
        
    for step, (x_batch, y_batch) in enumerate(train_loader):
        #print(x_batch.shape)
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        gc.collect()
        torch.cuda.empty_cache()
        
        
        images_adv,y_pred_adv = imagenet_make_cw_attack(x_batch, y_batch,normal_model, eps, conf, num_iterations, learning_rate)
        images_adv, y_pred_adv = images_adv.to(device), y_pred_adv.to(device)
        
        check+=len(images_adv)
        # Append adversarial images and labels to the batch
        adversarial_images.append(images_adv.detach().cpu().numpy())
        adversarial_labels.append(y_pred_adv.detach().cpu().numpy())
        # Append benign images and labels to the batch
        b_image, b_label = x_batch.detach().cpu().numpy(), y_batch.detach().cpu().numpy()
        benign_images.append(b_image)
        benign_labels.append(b_label)
        
        if (check)%50==0:
            print((check)) 
            
        if check > 1000:
            break
        
    # Concatenate the batch of adversarial images and labels into NumPy arrays
    adv_images = np.concatenate(adversarial_images)
    adv_labels = np.concatenate(adversarial_labels)
    ben_images = np.concatenate(benign_images)
    ben_labels = np.concatenate(benign_labels)
    
    np.savez(os.path.join(save_dir, str(eps)+'eps.npz'), a_images=adv_images, a_labels=adv_labels, b_images=ben_images, b_labels=ben_labels)

    
    
#launch attack
confidence = [0]
epsilons = 0.15
save_dir = 'adv samples/IMAGENET/ResNet50/CW/'
for conf in confidence: 
    compute_cw(test_loader, normal_model, epsilons, conf, save_dir)

Saving adversarial images for eps 0.15 and confidence 0 for CW
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
50
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
150
200
200
200
200
200
200
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
250
300
300
300
300
300
300
300
300
300
300
300
300
300
300
300
300
300
300
300
300
350
350
350
350
350
350
350
350
350
350
350
350
350
350
350
400
400
400
400
400
400
450