# **Homework 8 - Anomaly Detection**

If there are any questions, please contact mlta-2023-spring@googlegroups.com

Slide:    [Link](https://docs.google.com/presentation/d/18LkR8qulwSbi3SVoLl1XNNGjQQ_qczs_35lrJWOmHCk/edit?usp=sharing)　Kaggle: [Link](https://www.kaggle.com/t/c76950cc460140eba30a576ca7668d28)

# Set up the environment


## Package installation

In [187]:
# Training progress bar
!pip install -q qqdm

## Downloading data

In [188]:
!git clone https://github.com/chiyuanhsiao/ml2023spring-hw8

fatal: destination path 'ml2023spring-hw8' already exists and is not an empty directory.


# Import packages

In [189]:
import random
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision.models as models
from torch.optim import Adam, AdamW
from qqdm import qqdm, format_str
import pandas as pd
import math

# Loading data

In [190]:

train = np.load('./ml2023spring-hw8/trainingset.npy', allow_pickle=True)
test = np.load('./ml2023spring-hw8/testingset.npy', allow_pickle=True)

print(train.shape)
print(test.shape)

(100000, 64, 64, 3)
(19636, 64, 64, 3)


## Random seed
Set the random seed to a certain value for reproducibility.

In [211]:
def same_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

same_seeds(48763)

# Autoencoder

# Models & loss

In [212]:
# reference: https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py
class BasicBlock(nn.Module):
    def __init__(self, ic, oc, stride=1):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(ic, oc, kernel_size=3, stride=stride, padding=1),
            nn.BatchNorm2d(oc),
            nn.ReLU(inplace=True)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(oc, oc, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(oc),
        )
        
        self.relu = nn.ReLU(inplace=True)
    
        self.downsample = None
        if stride != 1 or (ic != oc):
            self.downsample = nn.Sequential(
                nn.Conv2d(ic, oc, kernel_size=1, stride=stride),
                nn.BatchNorm2d(oc),
            )
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        
        if self.downsample:
            residual = self.downsample(x)
            
        out += residual
        return self.relu(out)

# reference: https://blog.csdn.net/weixin_42369818/article/details/125292835
class ResNet(nn.Module):
    def __init__(self, block=BasicBlock, num_layers=[2, 1, 1, 1]):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
        )
        self.layer0 = self.make_residual(block, 32, 64,  num_layers[0], stride=2)
        self.layer1 = self.make_residual(block, 64, 128, num_layers[1], stride=2)
        self.layer2 = self.make_residual(block, 128, 256, num_layers[2], stride=2)
        self.layer3 = self.make_residual(block, 256, 64, num_layers[3], stride=2)
        
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(64, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Unflatten(1, (64, 4, 4)),
            nn.ConvTranspose2d(64, 128, 4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 256, 4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 3, 4, stride=2, padding=1),
            nn.Tanh(),
        )
    def make_residual(self, block, ic, oc, num_layer, stride=1):
        layers = []
        layers.append(block(ic, oc, stride))
        for i in range(1, num_layer):
            layers.append(block(oc, oc))
        return nn.Sequential(*layers)
    
    def encoder(self, x):
        x = self.conv1(x)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.fc(x)
        return x
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# reference: https://blog.csdn.net/weixin_42369818/article/details/125292835
class Auxiliary(nn.Module):
    def __init__(self):
        super().__init__()
        self.decoder = nn.Sequential(
            nn.Linear(64, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Unflatten(1, (64, 4, 4)),
            nn.ConvTranspose2d(64, 128, 4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 256, 4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 3, 4, stride=2, padding=1),
            nn.Tanh(),
        )
        
    def forward(self, x):
        return self.decoder(x)

# Dataset module

Module for obtaining and processing data. The transform function here normalizes image's pixels from [0, 255] to [-1.0, 1.0].


In [213]:
class CustomTensorDataset(TensorDataset):
    """TensorDataset with support of transforms.
    """
    def __init__(self, tensors):
        self.tensors = tensors
        if tensors.shape[-1] == 3:
            self.tensors = tensors.permute(0, 3, 1, 2)
        
        # reference: https://github.com/ncku-yee/ML2022/blob/master/HW08
        self.transform = transforms.Compose([
        transforms.Resize((256, 256),antialias=False),
        transforms.CenterCrop((196, 196)),
        transforms.Resize((64, 64),antialias=False),
        transforms.Lambda(lambda x: x.to(torch.float32)),
        transforms.Lambda(lambda x: 2. * x/255. - 1.),
      ])
        
    def __getitem__(self, index):
        x = self.tensors[index]
        
        if self.transform:
            # mapping images to [-1.0, 1.0]
            x = self.transform(x)

        return x

    def __len__(self):
        return len(self.tensors)

# Training

## Configuration


In [214]:
# Training hyperparameters
num_epochs = 15
batch_size = 32 # Hint: batch size may be lower
learning_rate = 1e-3

# Build training dataloader
x = torch.from_numpy(train)
train_dataset = CustomTensorDataset(x)

train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)


# Model
model_type = 'resnet'   # selecting a model type from {'cnn', 'fcn', 'vae', 'resnet'}
model_classes = {'resnet': ResNet()}
model = model_classes[model_type].cuda()
aux = Auxiliary().cuda()

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
optimizer_a = torch.optim.AdamW(aux.parameters(), lr=learning_rate)

## Training loop

In [None]:
best_loss = np.inf

qqdm_train = qqdm(range(num_epochs), desc=format_str('bold', 'Description'))
for epoch in qqdm_train:
    tot_loss = list()
    tot_loss_a = list()
    aux.train()
    temp = epoch // 2 + 1
    for data in train_dataloader:

        # ===================loading=====================
        img = data.float().cuda()

        # ===================forward=====================
        model.train() 
        output = model(img)

        loss = criterion(output, img)
        loss = loss.mean()
        tot_loss.append(loss.item())
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        model.eval()

        with torch.no_grad():
            z = model.encoder(img)

        with torch.no_grad():
            output = model(img)

        output_a = aux(z)
        loss_a = (criterion(output_a, output).mul(temp).exp()) * criterion(output_a, img)
        loss_a = loss_a.mean()

        tot_loss_a.append(loss_a.item())

        optimizer_a.zero_grad()
        loss_a.backward()
        optimizer_a.step()
        
    # ===================save_best====================
    mean_loss = np.mean(tot_loss)
    mean_loss_a = np.mean(tot_loss_a)
    if mean_loss < best_loss:
        best_loss = mean_loss
        torch.save(model, 'best_model_{}.pt'.format(model_type))
        torch.save(aux, 'best_model_a_{}.pt'.format(model_type))
    # ===================log========================
    qqdm_train.set_infos({
        'epoch': f'{epoch + 1:.0f}/{num_epochs:.0f}',
        'loss': f'{mean_loss:.4f}',
        'loss_a':f'{mean_loss_a:.4f}'
    })
    # ===================save_last========================
    torch.save(model, 'last_model_{}.pt'.format(model_type))
    torch.save(aux, 'last_model_a_{}.pt'.format(model_type))

 [1mIters[0m    [1mElapsed Time[0m      [1mSpeed[0m                                               
 [99m0/[93m15[0m[0m   [99m        -        [0m  [99m   -    [0m                                             
[1mDescription[0m   0.0% |                                                           |[K[F[K[F [1mIters[0m    [1mElapsed Time[0m      [1mSpeed[0m    [1mepoch[0m   [1mloss[0m   [1mloss_a[0m                      
 [99m1/[93m15[0m[0m   [99m00:06:53<[93m01:36:26[0m[0m  [99m0.00it/s[0m  [99m1/15[0m   [99m0.0703[0m  [99m0.0753[0m                      
[1mDescription[0m   6.7% |[97m█[0m[97m█[0m[97m█[0m                                                        |[K[F[K[F [1mIters[0m    [1mElapsed Time[0m      [1mSpeed[0m    [1mepoch[0m   [1mloss[0m   [1mloss_a[0m                      
 [99m2/[93m15[0m[0m   [99m00:13:46<[93m01:29:30[0m[0m  [99m0.00it/s[0m  [99m2/15[0m   [99m0.0364[0m  [99m0.0364[0m          

# Inference
Model is loaded and generates its anomaly score predictions.

## Initialize
- dataloader
- model
- prediction file

In [None]:
eval_batch_size = 200

# build testing dataloader
data = torch.tensor(test, dtype=torch.float32)
test_dataset = CustomTensorDataset(data)
test_sampler = SequentialSampler(test_dataset)
test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=eval_batch_size, num_workers=1)
eval_loss = nn.MSELoss(reduction='none')

# load trained model
checkpoint_path = f'best_model_{model_type}.pt'
checkpoint_path_a = f'best_model_a_{model_type}.pt'
model = torch.load(checkpoint_path)
aux = torch.load(checkpoint_path_a)

model.eval()

# prediction file 
out_file = 'prediction.csv'
out_file_a = 'prediction_a.csv'

In [None]:
anomality = list()
auxs = list()
with torch.no_grad():
  for i, data in enumerate(test_dataloader):
    img = data.float().cuda()
    encoded_img = model.encoder(img)
    output, output_a = model.decoder(encoded_img), aux(encoded_img)
    loss = eval_loss(output, img).sum([1, 2, 3])
    loss_a = eval_loss(output_a, img).mean([1, 2, 3])
    anomality.append(loss)
    auxs.append(loss_a)
    
anomality = torch.cat(anomality, axis=0)
anomality = torch.sqrt(anomality).reshape(len(test), 1).cpu().numpy()
auxs = torch.cat(auxs, axis=0)
auxs = torch.sqrt(auxs).reshape(len(test), 1).cpu().numpy()

df = pd.DataFrame(anomality, columns=['score'])
df.to_csv(out_file, index_label = 'ID')
df_a = pd.DataFrame(auxs, columns=['score'])
df_a.to_csv(out_file_a, index_label = 'ID')