### DEBUG - TST2207

In [None]:
# Utils
import random
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

# Torch
import torch
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from torchvision import datasets

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
uap_path = "./perturbations/uap_vgg16.npy"
dataset_dir = './dataset/train/'

In [None]:
def process_image(img, transpose=False, numpy=False):
    """
    Preprocess the images to displays them in a plot.

    Parameters:
    - img: The image to preprocess.
    - transpose: Bool value if the img needs to be transposed.
    - numpy: Flag to identify if the image is already in a numpy array.

    Returns:
    - img: The image preprocessed.       
    """        
    if transpose:
        if numpy is False:
            img = img.numpy()
        img = img.transpose((1, 2, 0)) # Transform (X, Y, Z) shape
    img = (img - img.min()) / (img.max() - img.min()) # Clip the image to [0, 255] values
    return img


In [None]:
def obj_variance(obj, type=None):
    """
    Display the maximum and minimum value in a given object.

    Parameters:
    - obj: A given object.
    - type: Object type. Can be "tensor" and "numpy".
    """

    if type == "tensor":
        tensor_min = torch.min(obj)
        tensor_max = torch.max(obj)
        
        print("Maximum value:", "{:.5f}".format(tensor_max.item()))
        print("Minimum value:", "{:.5f}".format(tensor_min.item()))
    else:
        numpy_min = "{:.5f}".format(np.max(obj))
        numpy_max = "{:.5f}".format(np.min(obj))
        
        print("Maximum value:", numpy_max)
        print("Minimum value:", numpy_min)

In [None]:
def get_min_max(tensor):
    min_val = torch.min(tensor)
    max_val = torch.max(tensor)
    print("Min: " + str(min_val))
    print("Max: " + str(max_val))

In [None]:
def normalize(x):
    mean = torch.Tensor([0.485, 0.456, 0.406])
    std = torch.Tensor([0.229, 0.224, 0.225])
    return (x - mean.type_as(x)[None,:,None,None]) / std.type_as(x)[None,:,None,None]

In [None]:
def get_uap(path, device):
    uap = np.load(path)
    uap = torch.tensor(uap, device=device)
    return uap

In [None]:
### @joseareia -- 2024-07-19
def prepare_model(model_name, device):
    model = getattr(models, model_name)(pretrained=True).to(device)
    return model

In [None]:
### @joseareia -- 2024-07-23
def get_dataloader(dataset, batch_size, num_images, shuffle=True):
    transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])
    
    val_dataset =  datasets.ImageFolder(dataset, transform=transform)
    
    num_classes = len(val_dataset.classes)
    
    class_indices = {i: [] for i in range(num_classes)}
    for idx, (_, label) in enumerate(val_dataset.samples):
        class_indices[label].append(idx)
        
    samples_per_class = num_images // num_classes
    
    subset_indices = []
    for i in range(num_classes):
        class_subset_indices = np.random.choice(class_indices[i], samples_per_class, replace=False).tolist()
        subset_indices.extend(class_subset_indices)
        
    val_subset = Subset(val_dataset, subset_indices)
    
    dataloader = torch.utils.data.DataLoader(val_subset, batch_size=batch_size, shuffle=shuffle, num_workers=2)
    
    return dataloader

In [None]:
def get_fooling_rate(model, delta, dataloader):
    adv_dataset = []
    adv_classes = []
    
    model.eval()    
    with torch.no_grad():
        for batch in dataloader:
            images, labels = batch
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(normalize(images))
            _, predicted = torch.max(outputs.data, 1)

            adv_images = torch.add(delta, images).clamp(0, 1)
            adv_outputs = model(normalize(adv_images))

            _, adv_predicted = torch.max(adv_outputs.data, 1)

            ### Beginning of changes by @joseareia -- 2024-07-22
                
            misclassified_indices = (predicted != adv_predicted).nonzero(as_tuple=True)[0]
            if misclassified_indices.numel() > 0:
                adv_dataset.append(adv_images[misclassified_indices].cpu())
                adv_classes.append(adv_predicted.cpu())
                
            del images, labels, outputs, adv_images, adv_outputs, predicted, adv_predicted
            torch.cuda.empty_cache()
            
            ### End of changes by @joseareia
    
    return adv_dataset, adv_classes

In [None]:
%%time
uap = get_uap(uap_path, device)

In [None]:
%%time
model = prepare_model('vgg16', device)

In [None]:
%%time
dataloader = get_dataloader(dataset=dataset_dir, batch_size=4, num_images=5000, shuffle=True)

In [None]:
%%time
delta = torch.clamp(uap, -10/255, 10/255)
adv_images = get_fooling_rate(model, delta, dataloader)

In [None]:
total_images = len(adv_images)*4
print("Total images: " + str(total_images))

In [None]:
adv_images_processed = process_image(adv_images[0][0].squeeze().cpu(), transpose=True, numpy=False)
plt.figure(figsize = (7,7))
plt.imshow(adv_images_processed)

In [None]:
imgs, labels = next(iter(dataloader))

In [None]:
images_processed = process_image(imgs[0].squeeze().cpu(), transpose=True, numpy=False)
plt.figure(figsize = (7,7))
plt.imshow(images_processed)

---

### DEBUG --- RETRIEVE ALL CLASSES

In [None]:
dataset = './dataset/train/'

In [None]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
])

val_dataset =  datasets.ImageFolder(dataset, transform=transform)

len(val_dataset)

In [None]:
num_classes = len(val_dataset.classes)

In [None]:
num_classes

In [None]:
class_indices = {i: [] for i in range(num_classes)}
for idx, (_, label) in enumerate(val_dataset.samples):
    class_indices[label].append(idx)

In [None]:
samples_per_class = 1000 // num_classes

In [None]:
subset_indices = []
for i in range(num_classes):
    class_subset_indices = np.random.choice(class_indices[i], samples_per_class, replace=False).tolist()
    subset_indices.extend(class_subset_indices)

In [None]:
val_subset = Subset(val_dataset, subset_indices)

In [None]:
dataloader = torch.utils.data.DataLoader(val_subset, batch_size=8, shuffle=True, num_workers=2)

In [None]:
len(dataloader)

In [None]:
imgs, labels = next(iter(dataloader))

In [None]:
len(imgs)