# Summary

Common notebook for reusable components (classes, functions, etc.) that can be reused in other notebooks.

In [None]:
import os
import gc
import cv2
import math
import copy
import time
import random
import glob
from matplotlib import pyplot as plt

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

import torchmetrics

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
CONFIG = {
    "seed": 42,
    "epochs": 16,
    "img_size": 96,
    "model_name": "tf_efficientnet_b0",
    "num_classes": 2,
    "train_batch_size": 32,
    "valid_batch_size": 32,
    "learning_rate": 1e-4,
    "scheduler": 'CosineAnnealingLR',
    "min_lr": 1e-6,
    "T_max": 500,
    "weight_decay": 1e-6,
    "train_size": 8000, # To have less training times, we do not want to train all for the baseline
    "val_size" : 200,
    "n_accumulate": 1,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    "generate_submit": True, # Since generating submit csv is slow, for some troubleshooting/testing we do not want to generate it
    "force_retrain": False, # Retrain the model even if the PREVIOUS_BEST_WEIGHT is loaded
}

IS_INTERACTIVE = ('runtime' in get_ipython().config.IPKernelApp.connection_file)

In [None]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)

## Dataset class

In [None]:
class HCDDataset(Dataset):
    def __init__(self, df, transforms=None):
        self.file_names = df['file_path'].values
        self.labels = df['label'].values
        self.transforms = transforms
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        img_path = self.file_names[index]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        label = self.labels[index]
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
            
        return {
            'image': img,
            'label': torch.tensor(label, dtype=torch.long)
        }

## Augmentations

In [None]:
data_transforms = {
    "train": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Flip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=1.0),
        A.ShiftScaleRotate(shift_limit=0.1, 
                           scale_limit=0.15, 
                           rotate_limit=60, 
                           p=0.5),
        A.HueSaturationValue(
                hue_shift_limit=0.2, 
                sat_shift_limit=0.2, 
                val_shift_limit=0.2, 
                p=0.5
            ),
        A.RandomBrightnessContrast(
                brightness_limit=(-0.1,0.1), 
                contrast_limit=(-0.1, 0.1), 
                p=0.5
            ),
        ToTensorV2()], p=1.),
    
    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        ToTensorV2()], p=1.)
}

## GeM Pooling

Code taken from <a href="https://amaarora.github.io/2020/08/30/gempool.html">GeM Pooling Explained</a>

In [None]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

## Model

In [None]:
def load_weights(model, weights_path, device=CONFIG["device"]):
    if os.path.isfile(weights_path):
        model.load_state_dict(torch.load(weights_path, map_location=device))
        print("Loaded previous weights")
        return True
    else:
        print("No previous weights available at ", weights_path)
        return False

class HCDModel(nn.Module):
    def __init__(self, model_name, num_classes, pretrained=True):
        super(HCDModel, self).__init__()
        
        self.pooling = GeM()
        self.sigmoid = nn.Sigmoid()
        
        
        self.cnn = timm.create_model(model_name, pretrained=pretrained)
        in_features = self.cnn.classifier.in_features
        
        self.cnn.classifier = nn.Identity()
        self.cnn.global_pool = nn.Identity()
        
        self.linear = nn.Linear(in_features, num_classes)


    def forward(self, images):
        features = self.cnn(images)
        pooled_features = self.pooling(features).flatten(1)
        return self.linear(pooled_features)

## Loss function

In [None]:
def criterion(outputs, labels):
    return nn.CrossEntropyLoss()(outputs, labels)

## Training and validation functions

In [None]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch, disable_progress_bar=False):
    model.train()
    
    dataset_size = 0
    running_loss = 0.0
    
    b_acc = torchmetrics.classification.BinaryAccuracy().to(device)
    b_auroc = torchmetrics.classification.BinaryAUROC().to(device)
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader), disable=disable_progress_bar)
    
    total_step = len(dataloader)
    log_step = math.ceil(total_step / 10) # we log each epoch 10 times in log

    epoch_start_time = time.time()
    
    for step, data in bar:
        images = data['image'].to(device, dtype=torch.float)
        labels = data['label'].to(device, dtype=torch.long)
        
        batch_size = images.size(0)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss = loss / CONFIG['n_accumulate']
            
        loss.backward()
    
        if (step + 1) % CONFIG['n_accumulate'] == 0:
            optimizer.step()

            # zero the parameter gradients
            optimizer.zero_grad()

            if scheduler is not None:
                scheduler.step()
                
        _, predicted = torch.max(model.sigmoid(outputs), 1)
        
        b_acc(predicted, labels)
        epoch_acc = b_acc.compute().item()
        
        b_auroc(predicted, labels)
        epoch_auroc = b_auroc.compute().item()
        
        running_loss += (loss.item() * batch_size)

        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        epoch_time = round(time.time() - epoch_start_time)
        bar.set_postfix(Epoch=epoch, Epoch_Time=epoch_time, Train_Loss=epoch_loss, Train_Acc=epoch_acc, Train_AUROC=epoch_auroc,
                        LR=optimizer.param_groups[0]['lr'])
        
        if disable_progress_bar and (step%log_step==0):
            print(step, "/", total_step)

    if disable_progress_bar:
        print(f"Epoch={epoch}, Epoch_Time={epoch_time}, Train_Loss={epoch_loss}, Train_Acc={epoch_acc}, Train_AUROC={epoch_auroc},LR={optimizer.param_groups[0]['lr']}")

    gc.collect()
    
    return epoch_loss, epoch_acc, epoch_auroc

@torch.inference_mode()
def valid_one_epoch(model, dataloader, device, epoch, disable_progress_bar=False):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    b_acc = torchmetrics.classification.BinaryAccuracy().to(device)
    b_auroc = torchmetrics.classification.BinaryAUROC().to(device)
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader), disable=disable_progress_bar)
    for step, data in bar:        
        images = data['image'].to(device, dtype=torch.float)
        labels = data['label'].to(device, dtype=torch.long)
        
        batch_size = images.size(0)

        outputs = model(images)
        loss = criterion(outputs, labels)

        _, predicted = torch.max(model.sigmoid(outputs), 1)
        acc = torch.sum( predicted == labels )
        
        b_acc(predicted, labels)
        epoch_acc = b_acc.compute().item()
        
        b_auroc(predicted, labels)
        epoch_auroc = b_auroc.compute().item()
        
        running_loss += (loss.item() * batch_size)        
        dataset_size += batch_size
        epoch_loss = running_loss / dataset_size
        
        bar.set_postfix(Epoch=epoch, Valid_Loss=epoch_loss, Valid_Acc=epoch_acc, Valid_AUROC=epoch_auroc)

    if disable_progress_bar:
        print(f"Epoch={epoch}, Valid_Loss={epoch_loss}, Valid_Acc={epoch_acc}, Valid_AUROC={epoch_auroc}")
        
    
    gc.collect()
    
    return epoch_loss, epoch_acc, epoch_auroc

## Run training function

In [None]:
def run_training(model, optimizer, scheduler, device, num_epochs, train_loader, valid_loader):
    if torch.cuda.is_available():
        print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
    
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_epoch_auroc = -np.inf
    history = defaultdict(list)
    
    for epoch in range(1, num_epochs + 1): 
        gc.collect()
        train_epoch_loss, train_epoch_acc, train_epoch_auroc = train_one_epoch(
            model, optimizer, scheduler, 
            dataloader=train_loader, 
            device=CONFIG['device'], epoch=epoch,
            disable_progress_bar=(not IS_INTERACTIVE)
        )
        
        val_epoch_loss, val_epoch_acc, val_epoch_auroc = valid_one_epoch(
            model, valid_loader, device=CONFIG['device'], epoch=epoch,
            disable_progress_bar=(not IS_INTERACTIVE)
        )
    
        history['Train Loss'].append(train_epoch_loss)
        history['Valid Loss'].append(val_epoch_loss)
        history['Train Accuracy'].append(train_epoch_acc)
        history['Valid Accuracy'].append(val_epoch_acc)
        history['Train AUROC'].append(train_epoch_auroc)
        history['Valid AUROC'].append(val_epoch_auroc)
        history['lr'].append( scheduler.get_lr()[0] )
        
        # deep copy the model
        if best_epoch_auroc <= val_epoch_auroc:
            print(f"{b_}Validation AUROC Improved ({best_epoch_auroc} ---> {val_epoch_auroc})")
            best_epoch_auroc = val_epoch_auroc
            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = "AUROC{:.2f}_Loss{:.4f}_epoch{:.0f}.bin".format(best_epoch_auroc, val_epoch_loss, epoch)
            torch.save(model.state_dict(), PATH)
            # Save a model file from the current directory
            print(f"Model Saved{sr_}")
            
        print()
    
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best AUROC: {:.4f}".format(best_epoch_auroc))
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model, history

In [None]:
def fetch_scheduler(optimizer):
    if CONFIG['scheduler'] == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=CONFIG['T_max'], 
                                                   eta_min=CONFIG['min_lr'])
    elif CONFIG['scheduler'] == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=CONFIG['T_0'], 
                                                             eta_min=CONFIG['min_lr'])
    elif CONFIG['scheduler'] == None:
        return None
        
    return scheduler

## Prepare loaders

In [None]:
def prepare_loaders(train_df, val_df):
    
    train_dataset = HCDDataset(train_df, transforms=data_transforms["train"])
    valid_dataset = HCDDataset(val_df, transforms=data_transforms["valid"])

    train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'], 
                              num_workers=2, shuffle=True, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['valid_batch_size'], 
                              num_workers=2, shuffle=False, pin_memory=True)
    
    return train_loader, valid_loader

# Plot history

In [None]:
def plot_history(history, metric_name):
    plt.plot( range(history.shape[0]), history[f"Train {metric_name}"].values, label=f"Train {metric_name}")
    plt.plot( range(history.shape[0]), history[f"Valid {metric_name}"].values, label=f"Valid {metric_name}")
    plt.xlabel("epochs")
    plt.ylabel(metric_name)
    plt.grid()
    plt.legend()
    plt.show()

## Generate submit csv

In [None]:
def generate_submit_csv(model, root_folder, transforms):
    test_df = pd.read_csv(os.path.join(root_folder, "sample_submission.csv"))
    test_df["file_path"] = test_df["id"].apply(lambda image_id: os.path.join(root_folder, "test", f"{image_id}.tif"))
    print("test_df shape:", test_df.shape)

    test_dataset = HCDDataset(test_df, transforms=transforms["valid"])
    test_loader = DataLoader(test_dataset, batch_size=CONFIG['valid_batch_size'], 
                                  num_workers=2, shuffle=False, pin_memory=True)

    preds = []

    total_step = len(test_loader)
    log_step = math.ceil(total_step / 10) # we log each epoch 10 times in log

    with torch.no_grad():
        bar = tqdm(enumerate(test_loader), total=len(test_loader), disable=(not IS_INTERACTIVE))
        for step, data in bar:        
            images = data['image'].to(CONFIG["device"], dtype=torch.float)        

            outputs = model(images)
            _, predicted = torch.max(model.sigmoid(outputs), dim=1)

            preds.extend(predicted.tolist())

            if not IS_INTERACTIVE and (step%log_step==0):
                print(step, "/", total_step)

    print(len(preds))
    
    test_df["label"] = preds
    test_df[["id", "label"]].to_csv("submission.csv", index=False)