In [None]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torchvision.models as models
from torchvision import datasets
import torchvision.transforms as transforms
from PIL import Image
from typing import List, Dict
import copy
import json
import shutil
import numpy as np
import matplotlib.pyplot as plt
import tempfile, os

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

supported_models = ['resnet18', 'resnet50', 'resnet101', 'vgg16_bn', 'vgg19_bn', 'inception_v3']
model_name = supported_models[2]
verify_model_name = supported_models[4]
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if str(device) == 'cpu':
    raise RuntimeError("cuda is NOT available!!")

benign_pic = './data/cat/Cat04.jpg'
benign_pic = './data/duck/Duck02.jpg'
benign_pic = './data/mouse/Mouse06.jpg'
noise_pic = './data/cat/Cat04.jpg'

benign_pic_size = Image.open(benign_pic).size

target_id = 508 # computer_keyboard
target_id = 99 # goose

# Constants

In [None]:
imagenet_mean=torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
imagenet_std=torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

In [None]:
# epsilon = 8 / 255. / imagenet_std
epsilon = 8 / 255. * imagenet_std
# epsilon

# Utilities

In [None]:
def clamp_tensor_image(tensor_image, clip_arrange=None, align_to_image:bool=False):
    if clip_arrange!=None and len(clip_arrange)!=2:
        raise ValueError(f"Incorrect len(clip_arrange) ({len(clip_arrange):d})")

    # import ipdb; ipdb.set_trace()
    if clip_arrange: # No need for `fgsm` as fgsm performs only one time and it can't exceed the limit of [x-epsilon, x+epsilon]
        # import ipdb; ipdb.set_trace()
        tensor_image = torch.max(torch.min(tensor_image, clip_arrange[0]), clip_arrange[1]) # clip new adv_x back to [x-epsilon, x+epsilon]

    if align_to_image:
        scale = 255
        tensor_image = tensor_image * scale  # Scale up
        tensor_image = torch.round(tensor_image)  # Round
        tensor_image = torch.clamp(tensor_image, 0, scale) # each item in tensor_image should be inside [0, 255]
        tensor_image = tensor_image / scale  # Scale down
    return tensor_image

## Noise

In [None]:
def get_noise_tensor(noise_pic:str, to_size:List[int]):
    to_size = [to_size[-1], to_size[-2]]
    transform = transforms.Compose([transforms.ToTensor(), transforms.Resize(size=to_size)])
    noise_image = Image.open(noise_pic).convert('RGB')
    return transform(noise_image).to(device)
noise_tensor0 = get_noise_tensor(noise_pic, benign_pic_size)

In [None]:
def compute_normed_noise(x, adv):
    noise = adv - x # [-1, 1]
    # print(f"min(noise) = {torch.min(noise).item():.4f}, max(noise) = {torch.max(noise).item():.4f}")
    normed_noise = noise / 2 # [-.5, .5]
    # print(f"min(normed_noise) = {torch.min(normed_noise).item():.4f}, max(normed_noise) = {torch.max(normed_noise).item():.4f}")
    normed_noise = normed_noise - torch.min(normed_noise) # [0, ]
    # print(f"min(normed_noise) = {torch.min(normed_noise).item():.4f}, max(normed_noise) = {torch.max(normed_noise).item():.4f}")
    normed_noise = normed_noise / (torch.max(normed_noise) - torch.min(normed_noise)) # [0, 1]
    # print(f"min(normed_noise) = {torch.min(normed_noise).item():.4f}, max(normed_noise) = {torch.max(normed_noise).item():.4f}")
    return normed_noise

## Visualize

In [None]:
# visualize(preprocess_image(benign_pic), adv_x, predicted_class, benign_confidence, adv_predicted_class, adv_confidence)
def visualize(x, adv, benign_label:int, adv_label:int, benign_confidence:float, adv_confidence:float, height:int=10, width:int=30):
    def restore(x):
        # x = x * imagenet_std + imagenet_mean
        return x

    # import ipdb; ipdb.set_trace()
    x, adv = restore(x.detach().cpu()), restore(adv.detach().cpu())
    normed_noise = compute_normed_noise(x, adv)
    x, adv, normed_noise = x.numpy().transpose([0, 2, 3, 1]), adv.numpy().transpose([0, 2, 3, 1]), normed_noise.numpy().transpose([0, 2, 3, 1]) # transpose (bs, C, H, W) back to (bs, H, W, C)
    plt.figure(figsize=(height, width))
    
    plt.subplot(1, 3, 1)
    # predicted_class, predicted_classname, confidence = classify_image(model, benign_pic, class_labels)
    plt.title(f"x: {class_labels[benign_label]} (confidence: {benign_confidence:.1%})")
    plt.axis('off')
    plt.imshow(x.squeeze())
    
    plt.subplot(1, 3, 2)
    plt.title(f"x_adv: {class_labels[adv_label]} (confidence: {adv_confidence:.1%})")
    plt.axis('off')
    plt.imshow(adv.squeeze())

    plt.subplot(1, 3, 3)
    plt.title(f"Noise (x_adv-x), normed to [0, 1]")
    plt.axis('off')
    plt.imshow(normed_noise.squeeze())
    
    # plt.tight_layout()

## Store & Load

In [None]:
import torch
from torchvision import transforms
from PIL import Image
import re

def store_img_from_tensor(tensor_image, img_path:str):
    if not re.match(r'.*\.png$', img_path, re.IGNORECASE):
        raise TypeError(f"We have to store image file to png due to the `loss nature of JPEG format`!")

    # tensor_image = tensor_image * imagenet_std + imagenet_mean
    # Define the transformation to apply to the tensor
    transform = transforms.ToPILImage()
    # Apply the transformation to the tensor
    pil_image = transform(tensor_image.squeeze())
    # Save the PIL image to disk
    pil_image.save(img_path) # , quality=100) - not necessary as png, unlike jpeg, will not lose quality
    
def load_img_to_tensor(img_path:str):
    # import ipdb; ipdb.set_trace()
    # Load the image using PIL
    pil_image = Image.open(img_path).convert('RGB')
    
    # Define the transformation to apply to the image
    img_loader = transforms.Compose([
        transforms.ToTensor(), # Convert the image to a tensor
        # transforms.Resize(size=8), # 256),
        # transforms.CenterCrop(size=6), #224),
        # transforms.Normalize(mean=imagenet_mean, std=imagenet_std) # Normalize the image
    ])

    # Apply the transformation to the PIL image
    tensor_image = img_loader(pil_image)
    tensor_image = torch.unsqueeze(tensor_image, 0)  # Add a batch dimension

    return tensor_image

# Create model

Model list is available [here](https://github.com/osmr/imgclsmob/blob/master/pytorch/pytorchcv/model_provider.py).

Model label list is ImageNet labels which can be found [here](https://files.fast.ai/models/imagenet_class_index.json)

In [None]:
class ModelFactory():
    _instance = None
    _supported_models = []
    _models = {}
    _class_labels = None

    def _fill_classlabels(self):
        class_file = 'imagenet_class_index.json'
        with open(class_file, 'r') as f:
            f_contents = f.read()
        class_labels = json.loads(f_contents)
        self._class_labels = {int(k):v[1] for k, v in class_labels.items()}
        
    def __new__(self, supported_models, *args, **kwargs):
        if not self._instance:
            self._instance = super(ModelFactory, self).__new__(self, *args, **kwargs)
            self._supported_models = copy.deepcopy(supported_models)
            self._models = dict()
            self._fill_classlabels(self)
        return self._instance
            
    def get_supported_models(self)->List[str]:
        return self.supported_models

    def get_model(self, model_name):
        m = model_name.lower()
        # import ipdb; ipdb.set_trace()
        try:
            self._supported_models[self._supported_models.index(m)]
        except ValueError as ve:
            raise ValueError(f"Not supported model: {model_name} - {ve.args}")
        model = self._models.get(m)
        if not model: # model is not yet initialized
            print(f"{m} is not yet initialized, create a new one!")
            model = models.get_model_builder(m)(pretrained=True).to(device)
            model.requires_grad_(False)
            model.eval()
            self._models[m] = model
        else:
            print(f"{m} is already initialized, return directly!")
        return self._models.get(m)

    def get_class_labels(self):
        return self._class_labels

model_factory = ModelFactory(supported_models)
class_labels = model_factory.get_class_labels()

# Classify picture

In [None]:
# Preprocess the image
def preprocess_image(image_path:str):
    preprocessed_image = load_img_to_tensor(image_path)

    return preprocessed_image.to(device)

def get_logits(model, image_path:str):
    preprocessed_image = preprocess_image(image_path)
    with torch.no_grad():
        logits = model(preprocessed_image)
    return logits

# Classify the image
def classify_image(model, image_path:str, class_labels:Dict[int, str]):
    logits = get_logits(model, image_path)
    # import ipdb; ipdb.set_trace()
    probabilities = torch.softmax(logits, dim=1)
    predicted_class = torch.argmax(probabilities.squeeze()).item()
    predicted_classname = class_labels[predicted_class]
    confidence = probabilities.squeeze()[predicted_class]
    return predicted_class, predicted_classname, confidence, probabilities

# Classify via tensor
def classify_tensor(model, x, class_labels:Dict[int, str]):
    with torch.no_grad():
        logits = model(x)
    probabilities = torch.softmax(logits, dim=1)
    predicted_class = torch.argmax(probabilities.squeeze()).item()
    predicted_classname = class_labels[predicted_class]
    confidence = probabilities.squeeze()[predicted_class]
    return predicted_class, predicted_classname, confidence, probabilities
    

## Check result of benign examples

In [None]:
test_tensor_image = load_img_to_tensor(benign_pic).to(device)
print(f"test_tensor_image.shape: {test_tensor_image.shape}")

test_model_name = supported_models[0]
test_model = model_factory.get_model(test_model_name)

y_hat = F.softmax(test_model(test_tensor_image))

test_pred = torch.argmax(y_hat).detach().cpu().item()
test_pred_name = class_labels[test_pred]
test_confidence = y_hat[0][test_pred]
print(f"Predicted name: {test_pred_name} with index: {test_pred:d}. Confidence: {test_confidence:.2%}")

In [None]:
model = model_factory.get_model(model_name)
predicted_class, predicted_classname, confidence, probabilities = classify_image(model, benign_pic, class_labels)
print(f'Predicted class: {predicted_classname} (No: {predicted_class:d}, with confidence: {confidence:.1%})')

# gradient

In order to calculate gradient, we'll use cross-entropy loss here:

$$\text{CrossEntropyLoss} = - \sum{_{i=1}^{N} \left( y_{i}\log{(p_{i})}+(1-y_{i})\log{(1-p_{i})} \right)}$$

Note, the `F.cross_entropy` or `nn.CrossEntropyLoss` is calculated as follows:

1. Apply a softmax function to the raw scores to get a probability distribution.
2. Compute the negative log-likelihood loss against the given labels.

So, the result of the following code is NOT `tensor(0.)` but `tensor(0.9048)` because the `F.cross_entropy` calculates `softmax` first on the input (`pred`) and thus the value of the zeroth index is no longer `1.`:

```python
loss_fn = F.cross_entropy
pred = torch.tensor([[1., 0, 0, 0, 0]])
labels = torch.tensor([0])
loss_fn(pred, labels)
```

Comparing with the following code which outputs resulting loss very close to `0`:

```python
loss_fn = F.cross_entropy
pred = torch.tensor([[10., 0, 0, 0, 0]])
labels = torch.tensor([0])
loss_fn(pred, labels)
```

Here, because the zeroth-index of `pred` is large enough comparing to others value, after `softmax`, the zeroth-index of the output (the input of the `negative log-likelihood`) is very close to `1.`, the output loss is thus very close the `0.`.

## non-target gradient

In [None]:
def non_target_grad(model, pic:str, ground_truth:int, loss_fn=F.cross_entropy):
    y = torch.tensor([ground_truth]).to(device)
    x = preprocess_image(pic)
    # import ipdb; ipdb.set_trace()
    x.requires_grad = True
    y_hat = model(x)
    loss = -loss_fn(y_hat, y)
    print(f"loss = {loss:.4f}")
    loss.backward()
    return x.grad

model = model_factory.get_model(model_name)
x_grad = non_target_grad(model, pic=benign_pic, ground_truth=predicted_class)

print(f"\nx_grad.shape = {x_grad.shape}")

## target gradient

In [None]:
def target_grad_by_x0(model, x, target:int, loss_fn=F.cross_entropy):
    y_target = torch.tensor([target]).to(device)
    
    x0 = x.detach().to('cpu').clone().to(device) # * imagenet_std + imagenet_mean
    x0.requires_grad = True
    y_hat = model(x0)
    
    target_loss = loss_fn(y_hat, y_target)
    adv_loss = torch.tensor([0.]).to(device)
    other_losses = []
    for y_other in class_labels.keys():
        if y_other == target:
            continue
        else:
            other_loss = loss_fn(y_hat, torch.tensor([y_other]).to(device))
            adv_loss += target_loss / other_loss
            other_loss = other_loss.detach().cpu()
            other_losses.append(other_loss)

    adv_loss.backward()
    return x0.grad.cpu(), other_losses, target_loss.cpu(), adv_loss.cpu()

# The function computes adversarial examples for a given model, initial input, target class, expected noise and learning rate.
def target_grad_by_x1(model, x, target:int, expected_noise, alpha:float, loss_fn=F.cross_entropy):
    # Preprocessing the benign picture
    benign_x = preprocess_image(benign_pic)

    # Detaching 'x' from the current graph and making it requires gradient computation
    x0 = x.detach().clone()
    x0.requires_grad = True

    # Forward pass through the model
    y_hat = model(x0)

    # Creating the target tensor
    y_target = torch.tensor([target], device=device)
    # Calculating the target loss
    target_loss = loss_fn(y_hat, y_target)
    
    # Initializing the adversarial loss
    adv_loss = torch.tensor([0.], device=device)
    other_losses = []
    
    # Calculating the adversarial loss for other class labels
    for other in class_labels.keys():
        # Skip if the class label is the same as the target
        if other != target:
            # Create a tensor for the other class
            y_other = torch.tensor([other]).to(device)
            # Compute the loss with the other class
            other_loss = loss_fn(y_hat, y_other)
            # Add the ratio of target loss to other loss to the adversarial loss
            adv_loss += target_loss / other_loss
            other_losses.append(other_loss.cpu())

    # Averaging the adversarial loss
    adv_loss = adv_loss / len(other_losses) if other_losses else 0
    
    # Backpropagation for adversarial loss
    adv_loss.backward(retain_graph=True)
    
    # Applying the gradient to x0
    adv_x = x0 - alpha * x0.grad.sign()

    # Compute the noise between the benign image and the adversarial example
    noise_tensor = compute_normed_noise(benign_x, adv_x)
    # Compute the noise loss
    noise_loss = F.mse_loss(expected_noise, noise_tensor)
    # Compute the total loss as the square root of the sum of the squares of the adversarial and noise losses
    total_loss = torch.sqrt(adv_loss ** 2 + noise_loss ** 2)

    # Clearing the gradients of all optimized tensors
    model.zero_grad()
    # Backpropagation for the total loss
    total_loss.backward()
    
    return x0.grad, other_losses, target_loss.detach().cpu(), noise_loss.detach().cpu(), total_loss.detach().cpu()


def target_grad_by_file(model, pic:str, target:int, loss_fn=F.cross_entropy):
    x = preprocess_image(pic)
    
    x_grad, other_losses, target_loss, adv_loss = target_grad_by_x0(model, x, target, loss_fn)
    return x_grad, other_losses, target_loss, adv_loss

model = model_factory.get_model(model_name)
x = preprocess_image(benign_pic)

# Merge cat and dog

In [None]:
pic01 = "./data/cat/Cat01.jpg"
pic02 = "./data/dog/Dog01.jpg"



# Attacking algorithms

## fgsm

In [None]:
def fgsm(benign_pic:str, model, target_id:int):
    x = preprocess_image(benign_pic)
    x_grad, other_losses0, target_loss0, adv_loss0 = target_grad_by_file(model, 
                                                                         benign_pic, 
                                                                         target=target_id)
    # import ipdb; ipdb.set_trace()
    adv_x = x.detach().cpu() - epsilon * x_grad.detach().sign()
    adv_x = clamp_tensor_image(adv_x)
    return adv_x, x_grad, other_losses0, target_loss0, adv_loss0

adv_x, x_grad, other_losses0, target_loss0, adv_loss0 = fgsm(benign_pic, model, target_id)

## ifgsm

In [None]:
def ifgsm(benign_pic:str, model, target_id:int, num_iterate:int=200, alpha=None):
    alpha = alpha if alpha!=None else epsilon.to(device) / num_iterate
    x = preprocess_image(benign_pic).detach()
    clip_ratio = 1
    clip_arrange = [x + clip_ratio * epsilon.to(device), x - clip_ratio * epsilon.to(device)]
    adv_x = x.clone()

    other_losses_list = []
    target_losses = []
    adv_losses = []
    display_interval = 25
    for i in range(num_iterate):
        # x_grad, other_losses, target_loss, adv_loss = target_grad_by_x0(model, x=adv_x, target=target_id)
        # import ipdb; ipdb.set_trace()
        x_grad, other_losses, target_loss, noise_loss, adv_loss = target_grad_by_x1(model, x=adv_x, target=target_id, expected_noise=noise_tensor0.unsqueeze(0), alpha=alpha)
        
        avg_other_losses = torch.mean(torch.tensor(other_losses)).item()
        
        if i % display_interval == 0:
            # import ipdb; ipdb.set_trace()
            print(f"avg_other_losses = {avg_other_losses:.8f},\ttarget_loss = {target_loss:.8f},\tnoise_loss = {noise_loss:.8f},\tadv_loss = {adv_loss.item():.8f}")
        
        other_losses_list.append(avg_other_losses)
        target_losses.append(target_loss)
        adv_losses.append(adv_loss)
        # print(f"Before change: adv_x - x = {torch.sum(adv_x-x).item():.8f}")
        adv_x = adv_x - alpha * x_grad.detach().sign()
        # print(f"After change: adv_x - x = {torch.sum(adv_x-x).item():.8f}")
        adv_x = clamp_tensor_image(adv_x, clip_arrange, align_to_image=False)
        # adv_x1 = clamp_tensor_image(adv_x1, clip_arrange, align_to_image=False)
        # adv_x = adv_x1
        # print(f"After clamp_tensor_image: adv_x - x = {torch.sum(adv_x-x).item():.8f}")

        # os.remove(adv_file)
    adv_x = clamp_tensor_image(adv_x, clip_arrange, align_to_image=True)
    # adv_x1 = clamp_tensor_image(adv_x1, clip_arrange, align_to_image=True)


    
    return adv_x, other_losses_list, target_losses, adv_losses

# Test

model = model_factory.get_model(model_name)

predicted_class, predicted_classname, confidence, probabilities = classify_image(model, benign_pic, class_labels)
print(f"\nFor {benign_pic}:")
print(f'Predicted class: {predicted_classname} (No: {predicted_class:d}, with confidence: {confidence:.1%}).')
print(f"The target classname is: {class_labels[target_id]} (No. {target_id}) with confidence: {probabilities[0][target_id]:.1%}")

# Check result of adversary examples

## Generate adversary example

In [None]:
%%time
num_iterate=45

adv_x, other_losses, target_losses, adv_losses = ifgsm(benign_pic, model, target_id, num_iterate=num_iterate, alpha=35*epsilon.to(device)/num_iterate)
# adv_x, x_grad, other_losses, target_loss, adv_loss = fgsm(benign_pic, model, target_id)
adv_file = tempfile.NamedTemporaryFile().name + ".png"
store_img_from_tensor(adv_x, adv_file)

In [None]:
predicted_class, predicted_classname, confidence, probabilities = classify_image(model, benign_pic, class_labels)
adv_predicted_class, adv_predicted_classname, adv_confidence, adv_probabilities = classify_tensor(model, adv_x.to(device), class_labels)

In [None]:
visualize(x=preprocess_image(benign_pic), adv=adv_x, benign_label=predicted_class, adv_label=adv_predicted_class, benign_confidence=confidence, adv_confidence=adv_confidence, height=20, width=60)

## Evaluate using same model

### via tensor

In [None]:
predicted_class, predicted_classname, confidence, probabilities = classify_image(model, benign_pic, class_labels)
adv_predicted_class, adv_predicted_classname, adv_confidence, adv_probabilities = classify_tensor(model, adv_x.to(device), class_labels)
print(f'Predicted class: {adv_predicted_classname} (No: {adv_predicted_class:d}, with confidence: {adv_confidence:.4%}).')
print(f"The target classname is: {class_labels[target_id]} (No. {target_id}) with confidence: {adv_probabilities[0][target_id]:.4%}")

In [None]:
visualize(x=preprocess_image(benign_pic), adv=adv_x, benign_label=predicted_class, adv_label=adv_predicted_class, benign_confidence=confidence, adv_confidence=adv_confidence, height=20, width=60)

### via stored picture

In [None]:
predicted_class, predicted_classname, confidence, probabilities = classify_image(model, benign_pic, class_labels)
adv_predicted_class, adv_predicted_classname, adv_confidence, adv_probabilities = classify_image(model, adv_file, class_labels)
print(f"\nFor {adv_file}:")
print(f'Predicted class: {adv_predicted_classname} (No: {adv_predicted_class:d}, with confidence: {adv_confidence:.4%}).')
print(f"The target classname is: {class_labels[target_id]} (No. {target_id}) with confidence: {adv_probabilities[0][target_id]:.4%}")
visualize(x=preprocess_image(benign_pic), adv=adv_x, benign_label=predicted_class, adv_label=adv_predicted_class, benign_confidence=confidence, adv_confidence=adv_confidence)

In [None]:
shutil.copyfile(adv_file, f"{class_labels[adv_predicted_class]}.png")

## Evaluate using defferent model

### Get a different model

In [None]:
verify_model = model_factory.get_model(verify_model_name)

In [None]:
predicted_class, predicted_classname, confidence, probabilities = classify_tensor(model, preprocess_image(benign_pic), class_labels)
predicted_class, predicted_classname, confidence.item()*100

In [None]:
predicted_class, predicted_classname, confidence, probabilities = classify_tensor(verify_model, adv_x.to(device), class_labels)
predicted_class, predicted_classname, confidence.item()*100

### via tensor

In [None]:
predicted_class, predicted_classname, confidence, probabilities = classify_image(verify_model, benign_pic, class_labels)
# predicted_class, predicted_classname, confidence, probabilities = classify_tensor(verify_model, preprocess_image(benign_pic), class_labels)
adv_predicted_class, adv_predicted_classname, adv_confidence, adv_probabilities = classify_tensor(verify_model, adv_x.to(device), class_labels)
print(f'Predicted class: {adv_predicted_classname} (No: {adv_predicted_class:d}, with confidence: {adv_confidence:.4%}).')
print(f"The target classname is: {class_labels[target_id]} (No. {target_id}) with confidence: {adv_probabilities[0][target_id]:.4%}")

In [None]:
visualize(x=preprocess_image(benign_pic), adv=adv_x, benign_label=predicted_class, adv_label=adv_predicted_class, benign_confidence=confidence, adv_confidence=adv_confidence)

### via stored picture

In [None]:
store_img_from_tensor(adv_x, adv_file)
adv_x1 = load_img_to_tensor(adv_file).to(device)
adv_x.allclose(adv_x1, rtol=1e-8, atol=10e-1)

In [None]:
predicted_class, predicted_classname, confidence, probabilities = classify_image(verify_model, benign_pic, class_labels)
adv_predicted_class, adv_predicted_classname, adv_confidence, adv_probabilities = classify_image(verify_model, adv_file, class_labels)
print(f"\nFor {adv_file}:")
print(f'Predicted class: {adv_predicted_classname} (No: {adv_predicted_class:d}, with confidence: {adv_confidence:.4%}).')
print(f"The target classname is: {class_labels[target_id]} (No. {target_id}) with confidence: {adv_probabilities[0][target_id]:.4%}")
visualize(x=preprocess_image(benign_pic), adv=adv_x, benign_label=predicted_class, adv_label=adv_predicted_class, benign_confidence=confidence, adv_confidence=adv_confidence)

In [None]:
os.remove(adv_file)