## AutoEncoder
Alberto Bellumat - alberto.bellumat@studenti.unitn.it<br>
Claudio Facchinetti - claudio.facchinetti@studenti.unitn.it<br>

### Step 1: Import everything

In [None]:
!pip install --upgrade --force-reinstall --no-deps albumentations

Collecting albumentations
  Using cached https://files.pythonhosted.org/packages/e7/27/2fa0ec5e0c04c410cbb54dd79910afa884409440653aa4688654e6497e2a/albumentations-1.0.2-py3-none-any.whl
Installing collected packages: albumentations
  Found existing installation: albumentations 1.0.2
    Uninstalling albumentations-1.0.2:
      Successfully uninstalled albumentations-1.0.2
Successfully installed albumentations-1.0.2


In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms
import os
from os import listdir
import numpy as np
import random
import shutil
import albumentations as A
import pandas as pd
from albumentations.pytorch import ToTensorV2
from PIL import Image
import gc
import filecmp
from google.colab import drive

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
! rm -rf dataset*
! gdown --id 1febPQrnQVsYflQdxXFEqGxNYG7Y1nHH5
! unzip -qq dataset.zip -d ./dataset && echo "Done"

Downloading...
From: https://drive.google.com/uc?id=1febPQrnQVsYflQdxXFEqGxNYG7Y1nHH5
To: /content/dataset.zip
82.9MB [00:00, 178MB/s]
Done


In [None]:
def create_valid(proportion=0.8):
    """Function for creating the valid folder, which contains the images that will be used as validation set. 
    During the process, the images for the validation set are moved from the train folder into the valid folder.
    The split is done such that all images associated to a person are either in the train or in the valid folder.

    Args:
        proportion(float): The proportion of unique persons to keep in the train folder.
    """
    
    #Create the valid folder
    dirName = 'dataset/valid'
    if not os.path.exists(dirName):
        os.mkdir(dirName)
        print("Directory " , dirName ,  " Created ")
    else:    
        print("Directory " , dirName ,  " already exists")

    #Find all unique person IDs.
    unique_set = set([x.split('_')[0] for x in os.listdir("dataset/train")])
    #Define the number of unique person IDs that will be kept in the train folder.
    train_count = int(len(unique_set) * proportion)
    list_images = list(unique_set)
    #Random shuffle the list of unique IDs
    random.shuffle(list_images)
    #Split the images for the validation set from the images for the training set, and save the images for the validation set into the valid folder.
    train_images = list_images[:train_count]
    valid_images = list_images[train_count:]
    print("Elements in the train folder before " + str(len(os.listdir("dataset/train"))))
    
    for image_id in valid_images:
        for file in os.listdir("dataset/train"):
            if file.startswith(image_id):
                shutil.copy("dataset/train/"+file,dirName)
                os.remove("dataset/train/"+file)
    
    
    print("Elements in the train folder after " + str(len(os.listdir("dataset/train"))))
    print("Elements in the valid folder "+ str(len(os.listdir("dataset/valid"))))

In [None]:
def save_checkpoint(model, optimizer, epoch, val_loss):
    """Function for storing locally the model into the data.pt file. We also store the optimizer state during the training.

    Args:
        model: The model that you wish to store.
        optimizer: The optimizer you wish to store
        epoch: The epoch at wich the store was perfomed.
        val_loss: The validation loss computed at the moment in which the storage was perfomed
    """
    torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'epoch': epoch,
            'val_loss': val_loss
            }, "data.pt")

In [None]:
def adjust_learning_rate(optimizer, shrink_factor=0.8):
    """Function for reducing the learning rate of the optimizer.

    Args:
        optimizer: The optimizer whose learning rate you will reduce.
        shrink_factor(float): The proportion of the reduction.
    """
    print("\nDECAYING learning rate.")
    for param_group in optimizer.param_groups:
        param_group['lr'] = param_group['lr'] * shrink_factor
    print("The new learning rate is %f\n" % (optimizer.param_groups[0]['lr'],))

In [None]:
#Trasnform used for data augmentation. We apply ranomdly in sequence some horizontal flip, brightnes, gaussian noies, and small rotations. Then we normalzie the images (each pixel is divied by 255)
album_transform = A.Compose([
    A.HorizontalFlip(p=0.6),
    A.RandomBrightness(p=0.4, limit=(-0.2, 0.2)),
    A.GaussNoise(p=1.0, var_limit=(10.0, 50.0)),
    A.Rotate(limit=(-10, 10), border_mode=1),
    #Normalize the pixels in the image.
    A.Normalize(mean = (0, 0, 0),std  = (1, 1, 1)),
    ToTensorV2(),
])



In [None]:
#Transform for converting images to tensor. Not to be used for data augmentation.
img_transform = transforms.Compose([
    transforms.ToTensor()
])

In [None]:
class ImagesDataset(torch.utils.data.Dataset):
    """The ImagesDataset is an extension of the class Dataset of PyTorch.
    """

    def __init__(self, img_dir="./", transform=None, aug=1, mode="Album", return_name=False):
        """

        The __init__ method of the class ImagesDataset.
        
        Args:
            img_dir (string): The folder in which the images are present.

            transform: The transform you will apply on the image when the method __getitem__ is called.

            aug (int): The number of replicas of an image in the original dataset. It should be used in conjuction with the application of a transform for data augmentation.
            If aug is set to 2, it means that for each original image in the dataset, we create two replicas (thus the length of the dataset is doubled). Note that these replicas are best used for data augmenation
            and the augmentations are only temporary, thus the original dataset is left untouched. 

            mode(string): It can get the value either "Album" or "Torch". If "Torch" it means that we are using the predfined ones in PyTorch. If "Album" it means that the we use the ones we defined using albumentation. 

        """
        self.aug = aug
        self.return_name=return_name
        self.__imgs = []
        self.__img_dir = img_dir
        self.__trans = transform
        self.__mode = mode

        if not self.__img_dir.endswith("/"):
            self.__img_dir += '/'
    
        self.__load_img_names()
    
  
    def __len__(self):
        return self.aug*len(self.__imgs)

    def __getitem__(self, idx):

        idx = idx % len(self.__imgs)
        img_name = self.__imgs[idx]
        img = self.__load_img(img_name)

        if self.__trans is not None:
            if self.__mode is "Torch":
                img = self.__trans(img)
            elif self.__mode is "Album":
                transformed = self.__trans(image=np.array(img))
                img = transformed["image"]
        
        if (self.return_name):    
            return img, img_name
        else:
            return img


    def __load_img(self, img_name):
        img = Image.open(self.__img_dir + img_name)
        return img

    def __load_img_names(self):
        self.__imgs = [x for x in listdir(self.__img_dir)]

In [None]:
class vgg_brk(nn.Module):
    def __init__(self, in_size, out_size):
        super(vgg_brk, self).__init__()
        self.brk = nn.Sequential(
            nn.Conv2d(in_size, out_size, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),  # b, 16, 10, 10
            torch.nn.BatchNorm2d(out_size),
            torch.nn.ReLU(inplace=True),
            nn.Conv2d(out_size, out_size, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            torch.nn.BatchNorm2d(out_size),
            torch.nn.ReLU(inplace=True),
        )
        

    def forward(self, inputs):
        outputs = self.brk(inputs)
        return outputs

In [None]:
class vgg_revbrk(nn.Module):
    def __init__(self, in_size, out_size):
        super(vgg_revbrk, self).__init__()
        self.revbrk = nn.Sequential(
            torch.nn.ReLU(inplace=True),
            torch.nn.BatchNorm2d(in_size),
            nn.ConvTranspose2d(in_size, in_size, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            torch.nn.ReLU(inplace=True),
            torch.nn.BatchNorm2d(in_size),
            nn.ConvTranspose2d(in_size, out_size, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        )
        

    def forward(self, inputs):
        outputs = self.revbrk(inputs)
        return outputs

In [None]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.brk1 = vgg_brk(in_size=3, out_size=64)
        self.brk2 = vgg_brk(in_size=64, out_size=128)
        self.brk4 = vgg_brk(in_size=128, out_size=256)  
        self.maxpooling = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False, return_indices=True)
        self.maxunpooling = nn.MaxUnpool2d(kernel_size=2, stride=2, padding=0)
        self.revbrk1 = vgg_revbrk(in_size=256, out_size=128)
        self.revbrk2 = vgg_revbrk(in_size=128, out_size=64)
        self.revbrk4 = vgg_revbrk(in_size=64, out_size=3)
        self.sigmoid = nn.Sigmoid()


    def forward(self, x):
        x = self.brk1(x)
        x, indices1 = self.maxpooling(x)
        x = self.brk2(x)
        x, indices2 = self.maxpooling(x)
        x = self.brk4(x)
        x, indices4 = self.maxpooling(x)
        x = self.maxunpooling(x, indices4)
        x = self.revbrk1(x)
        x = self.maxunpooling(x, indices2)
        x = self.revbrk2(x)
        x = self.maxunpooling(x, indices1)
        x = self.revbrk4(x)
        x = self.sigmoid(x)
        return x

In [None]:
def save_model_to_drive():
  drive.mount('/content/gdrive')
  model = torch.load('data.pt')
  path = F"/content/gdrive/My Drive/models/data.pt"
  torch.save(model, path)

In [None]:
def train(train_loader, model, optimizer):
    """Function for the train of the Model in a epoch.

    Args:
        train_loader: The DataLoader for the training.
        model: The model that will be trained.
        optimizer: The optimizer used for the training.
    """
  
    model.train()

    criterion = nn.MSELoss()

    running_loss = 0

    for data in train_loader:

        # print(data.shape[0])

        img = data
        img = img.to(device)

        # ===================forward=====================

        output = model(img)
        loss = criterion(output, img)

        # ===================backward====================

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # ===============================================

        running_loss += loss.item() * data.shape[0]

    return (running_loss / len(train_loader.sampler))

In [None]:
def test(eval_loader, model):
    """Function for the test of the Model.

    Args:
        eval_loader: The DataLoader for the testing.
        model: The model to be tested.
    """
    model.eval()
    criterion = nn.MSELoss()
    running_loss = 0

    with torch.no_grad():
        for data in eval_loader:
            img = data
            img = img.to(device)
            output = model(img)
            loss = criterion(output, img)

            running_loss += loss.item() * data.shape[0]

    return (running_loss / len(eval_loader.sampler))

In [None]:
def train_model(model, optimizer, starting_epoch=1, patience = 20, batch_size = 32):

    """Function for the whole training of the Model.

    Args:
        starting_epoch(int): The epoch from which we will start the training. It is relevant only for a possible resuiming of the training.
        optimizer: The optimizer used for the training.
        model: The model that will be trained.
        patience(int): The number of epochs after which if there are no improvemente the whole training process will be stopped.
        batch_size(int): The size of the bacth for the train and test loader
    """
    
    #Define the dataset and dataloader for the training. In this part, we will use a transfrom for data augmentation, so anytime you try to access an element of the dataset, the transform will be automatically applied
    #on the retrived image. Aug is set to 2, so we have for each original image, two replicas.
    train_dataset_full = ImagesDataset(img_dir='./dataset/train/', transform=album_transform, aug=2)
    train_loader = DataLoader(train_dataset_full, batch_size=batch_size, shuffle=True)

    #Define the dataset and datloader for the validation. In this part, no data augmenation will be used.
    valid_dataset_full = ImagesDataset(img_dir='./dataset/valid/', transform=img_transform, mode="Torch")
    valid_loader = DataLoader(valid_dataset_full, batch_size=batch_size, shuffle=True)
    
    #the global minimu validation loss. 
    best_loss = 100000

    epoch = starting_epoch
  
    #counter for the number of epochs without no imrpovements
    epochs_with_no_improvements=0
    
    print('\nStart training of the AutoEncoder\n')
    
    while True:
        #In one epoch, train and test the model.
        training_loss = train(train_loader, model, optimizer)
        validation_loss = test(valid_loader,model)
        
        print('epoch {}, training loss:{:.4f}, validation loss:{:.4f}'
          .format(epoch, training_loss, validation_loss))
        
        #check if the validation loss computed in this single epoch is less than the global minimum validation loss found on the previous epochs.
        #If the validation loss is less than the minumu validation loss, then the validation loss becomes the global minimum validation loss.
        is_best = validation_loss < best_loss
        best_loss = min(best_loss, validation_loss)
        
        #if the validation loss is less than the minimu validation loss, then save the model, and set the counter to 0.
        if is_best:
            save_checkpoint(model,  optimizer, epoch, validation_loss)
            print('\nSave model at epoch {}\n'.format(epoch))
            epochs_with_no_improvements=0

        #if the validation loss is not less than the minimum validation loss, then add one to the counter. 
        #After the addition, if the counter is equal to patience, then stop the trainig, but if it is 8 or 16, then we try to reduce the learning rate of the optimizer.
        else:
            epochs_with_no_improvements+=1
            
            if(epochs_with_no_improvements == patience):            
                print('\nEnd of the training at epoch {}'.format(epoch))
                break        

            elif (epochs_with_no_improvements % 8 == 0):
                print('\nAdjust learning at epoch {}\n'.format(epoch))
                adjust_learning_rate(optimizer, 0.8)

        epoch+=1

In [None]:
def find_anomalies_in_test(model, threshold_loss, batch_size=32):

    """Function for finding possible junk images in test folder.

    Args:
        model: The model that will be used.
        threshold_loss: The threshold over which an image is considered junk
        batch_size: The size of batch for the test loader.
    """
    
    test_dataset = ImagesDataset(img_dir='./dataset/test/', transform=img_transform, mode="Torch",return_name=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    model.eval()
    criterion = nn.MSELoss(reduction='none')
    list_errors = []
    
    with torch.no_grad():
        for data in test_loader:
            img, label = data
            img = img.to(device)
            output = model(img)
            loss_mse = criterion(output, img)
        
            for i in range(img.shape[0]):
                loss= torch.mean(loss_mse[i]).item()
                if (loss >= threshold_loss):
                    list_errors.append(label[i])
                    
    return list_errors

In [None]:
#IF ACTIVATE TRAINING IS TRUE, THEN THE TRAINING WILL START. THE TRAINING TAKES >10 HOURS IN GOOGLE COLAB, UNLESS YOU STOP IT. IT IS BETTER IF YOU USE THE PRE-TRAINED MODEL.
#The pre-trained model is stored in data.pt file.
def main(activate_training=False, save_model= False):
    """Main function for either the training of the AutoEncoder, or the discovery of anomalies.

    Args:
        activate_training (boolean): A boolean. True if you want to start the training, or False if you want to find the possible junk images in the test dataset.
        save_model(boolean): A boolean. Set it to True if you want to save the model in Drive after the training.
    """

    gc.collect()
    torch.cuda.empty_cache()
    
    if activate_training:
        #define the parameters
        learning_rate=1e-3 
        starting_epoch = 1
        #define the model
        model = AutoEncoder().to(device)
        #define the optimzier
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,
                             weight_decay=1e-5)
        
        #create the vlaidation folder.
        create_valid()
        #train the model
        train_model(model, optimizer, starting_epoch)
        #and if save_model is true, save the model to Drive
        if save_model:
          save_model_to_drive()
    
    else:
        #define the model
        model = AutoEncoder()
        #load the data.
        checkpoint = torch.load("data.pt")
        #load the model parameters in the data.
        model.load_state_dict(checkpoint['model_state_dict'])
        model.to(device)
        #find the list of possible junk images in the test folder
        result = find_anomalies_in_test(model, 0.001)
        return result

In [None]:
#The main() function returns the list of possible junks in the test folder.
#From the data in the dataset of the project, you should not be able to identify which image is junk or not, due to the lack of labels for the anomaly detection task.
#So, ideally, you would need to manually check that images in the list can be correctly considered anomalies; otherwise, you might risk deleting valid images from the test folder in the next tasks.
#But, for simplifying the check process, we "cheated" by using as reference the original dataset provided by the following link: http://zheng-lab.cecs.anu.edu.au/Project/project_reid.html.
#In the original dataset, the junk images in the test dataset are the ones with the corresponding names starting with "-1" or "00000".
#So, once we found the possible junk images, we perform an image-by-image check, using the names in the original test dataset as labels. For each image in the list, we find its equivalent in the original test dataset, and once we find a match, we check if the matched image has the name starting with either "-1" or "00000"; we avoid the manual check-in this way.
#But, at this pint, you might even directly check image by image using the original test dataset and the test dataset of the project for finding junk images, without relying on the list of possible anomalies; thus, rendering the autoencoder pointless.
#Lastly, you need to have the AE saved as data.pt file if you wanna find the list of possible junk images in the test folder.
result_anomal = main()

  return torch._C._nn.max_pool2d_with_indices(input, kernel_size, stride, padding, dilation, ceil_mode)
