In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import pandas as pd
import torch.nn.functional as F
import foolbox

Define a simple CNN model for image classification

In [6]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(14 * 14 * 32, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [7]:
# sample dataset (MNIST)
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

In [8]:
trainSet = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 119982774.85it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 19534864.35it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 51367708.22it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz





Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 18265128.25it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [9]:
trainLoader = torch.utils.data.DataLoader(trainSet, batch_size=4, shuffle=True)

In [10]:
model = SimpleCNN()

**Cross-Entropy Loss.**

Suitability for Classification:
Cross-entropy loss is commonly used for classification tasks, especially when dealing with multiple classes. It measures the dissimilarity between the predicted class probabilities and the true distribution of class labels.

In [11]:
criterion = nn.CrossEntropyLoss()

**Adam Optimizer**

Adaptive Learning Rate:
Adam is an adaptive optimization algorithm that adjusts the learning rate for each parameter individually. It helps in speeding up convergence and dealing with different magnitudes of gradients for different parameters.

In [12]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

Training of the model

In [55]:
for epoch in range(5):
    for inputs, labels in trainLoader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()image, label = trainSet[0]
image = image.numpy()

adversarial_image_tensor = torch.from_numpy(image).float()
fmodel = foolbox.PyTorchModel(model, bounds=(0, 1))

In [56]:
epsilon_value = 0.1
attack =  torchattacks.FGSM(model, eps=8/255)

In [None]:
labels = torch.tensor(labels).clone().detach().requires_grad_(True)
adversarial_image = attack( adversarial_image_tensor, labels)

**Adversarial Defense **

In [1]:

def adversarial_training(model, trainLoader, criterion, optimizer, epsilon=0.01, num_epochs=5):
    for epoch in range(num_epochs):
        for inputs, labels in trainLoader:
            # Create adversarial examples
            fmodel = foolbox.PyTorchModel(model, bounds=(0, 1))
            attack = foolbox.attacks.FGSM(fmodel)
            inputs_adv = torch.tensor([attack(image.numpy(), label) for image, label in zip(inputs, labels)])

            # Forward pass
            outputs = model(inputs)
            outputs_adv = model(inputs_adv)

            # Calculate losses
            loss = criterion(outputs, labels)
            loss_adv = criterion(outputs_adv, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            (loss + loss_adv).backward()
            optimizer.step()

In [16]:

model_defense = SimpleCNN()

criterion_defense = nn.CrossEntropyLoss()
optimizer_defense = optim.Adam(model_defense.parameters(), lr=0.001)

# Train the model with adversarial training
adversarial_training( model= model_defense,trainLoader=trainLoader, criterion=criterion_defense,  num_epochs=10)

# Evaluate the model on the adversarial example
inputs_adv_tensor = torch.tensor(adversarial_image)
outputs_defense = model_defense(inputs_adv_tensor.unsqueeze(0))

# Convert logits to probabilities and get the predicted class
probabilities = nn.Softmax(dim=1)(outputs_defense)
_, predicted_class = torch.max(probabilities, 1)

print(f'Predicted class after adversarial training: {predicted_class.item()}')

**Adversarial Attack Evaluation:**

After generating an adversarial example, i evaluate the model's performance on both the original and adversarial examples. The key metrics to check are:

In [26]:
# Evaluate the model on the original example
outputs_original = model(torch.tensor(image).unsqueeze(0))
_, predicted_original = torch.max(F.softmax(outputs_original, dim=1), 1)
print(f'Predicted class on original example: {predicted_original.item()}')

# Evaluate the model on the adversarial example
outputs_adversarial = model(torch.tensor(adversarial_image).unsqueeze(0))
_, predicted_adversarial = torch.max(F.softmax(outputs_adversarial, dim=1), 1)
print(f'Predicted class on adversarial example: {predicted_adversarial.item()}')



**Adversarial Defense Evaluation:**


After training the model with adversarial training, i  evaluate its performance on both the original and adversarial examples:

In [None]:
# Evaluate the defense model on the original example
outputs_defense_original = model_defense(torch.tensor(image).unsqueeze(0))
_, predicted_defense_original = torch.max(F.softmax(outputs_defense_original, dim=1), 1)
print(f'Predicted class on original example after adversarial training: {predicted_defense_original.item()}')

# Evaluate the defense model on the adversarial example
outputs_defense_adversarial = model_defense(torch.tensor(adversarial_image).unsqueeze(0))
_, predicted_defense_adversarial = torch.max(F.softmax(outputs_defense_adversarial, dim=1), 1)
print(f'Predicted class on adversarial example after adversarial training: {predicted_defense_adversarial.item()}')