# Import packages

In [None]:
import os
import time
import kornia
import torch
import torch.nn as nn
import torch.optim as optim

import os
from torch.utils.data import Dataset
from torchvision import datasets, transforms, models
from torch.nn import functional as F
import torchvision.transforms as T

from torchvision.io import read_image
import torchvision

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler
from torch.utils.tensorboard import SummaryWriter

from dataclasses import dataclass

import kornia as K
import random

import torchvision.models as models


# Training parameters

In [None]:
@dataclass
class TrainingConfiguration:
    epochs_count: int = 100
    data_path: str = '/kaggle/input/opencv-pytorch-course-segmentation'
    num_workers: int = 2
    batch_size: int = 16
    plot_interval: int = 5
    device: str = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    lr: float = 10e-4
    decay_rate: float = 0.1 

TrainConfig = TrainingConfiguration()

# Find mean and std of the trainset

In [None]:
# def get_mean_std(num_workers=2):
    
#     transform = transforms.Compose([
#         transforms.ToPILImage(),
#         transforms.ToTensor()
#         ])
    
#     loader = torch.utils.data.DataLoader(
#         SemSegDataset(data_path, csv_fname, train_val_test, transforms = transform),
#         batch_size=8)
        
#     batch_mean = torch.zeros(3)
#     batch_mean_sqrd = torch.zeros(3)
    
#     for batch_data, _ in loader:
#         batch_mean += batch_data.mean(dim=(0, 2, 3)) 
#         batch_mean_sqrd += (batch_data ** 2).mean(dim=(0, 2, 3)) 
    
#     mean = batch_mean / len(loader)
    
#     var = (batch_mean_sqrd / len(loader)) - (mean ** 2)
        
#     std = var ** 0.5
    
#     return mean, std

# mean,std = get_mean_std()

In [None]:
# # Mean and STD of train set
# t_mean = [0.4516, 0.5142, 0.4693]
# t_std = [0.1720, 0.1528, 0.1902]

# Mean and STD from transfer learning
t_mean = [0.485, 0.456, 0.406] 
t_std = [0.229, 0.224, 0.225]

# Dataset Class

In [None]:
# addtional transforms        
add_transforms = T.Compose([
        T.ToPILImage(),
        T.ColorJitter(brightness = 0.2, contrast = 0.2, saturation = 0.2),
        T.ToTensor(),
        T.Normalize(t_mean, t_std)
        ])

commontransforms = T.Compose([
        T.ToPILImage(),
        T.CenterCrop((512,512)),
        T.ToTensor(),
        T.Normalize(t_mean, t_std)
        ])

def augmentation(image, mask, patchsize = 512):
        
        # Random Affine
        affine_params = T.RandomAffine(0).get_params((0, 0), (0, 0), (0.8, 1.5), (0.9, 1.1,0.9, 1.1), 
                                                          img_size = (image.shape[0],image.shape[1]))
        image = T.functional.affine(image,*affine_params)
        mask = T.functional.affine(mask,*affine_params)
        
        # Random cropping
        i, j, h, w = T.RandomCrop.get_params(
            image, output_size = (patchsize, patchsize))
        image = T.functional.crop(image, i, j, h, w)
        mask = T.functional.crop(mask, i, j, h, w)
        
        # Random horizontal flipping
        if random.random() > 0.5:
            image = T.functional.hflip(image)
            mask = T.functional.hflip(mask)

        # Random vertical flipping
        if random.random() > 0.5:
            image = T.functional.vflip(image)
            mask = T.functional.vflip(mask)
        
        return image, mask
    
class SemSegDataset(Dataset):
    """ Generic Dataset class for semantic segmentation datasets.

        Arguments:
            data_path (string): Path to the dataset folder.
            images_folder (string): Name of the folder containing the images (related to the data_path).
            masks_folder (string): Name of the folder containing the masks (related to the data_path).
            csv_path (string): train or test csv file name
            image_ids (list): List of images.
            train_val_test (string): 'train', 'val' or 'test'
            transforms (callable, optional): A function/transform that inputs a sample
                and returns its transformed version.
            class_names (list, optional): Names of the classes.
            

        Dataset folder structure:
            Folder containing the dataset should look like:
            - data_path
            -- images_folder
            -- masks_folder

            Names of images in the images_folder and masks_folder should be the same for same samples.
    """
    def __init__(self, data_path, train_val_test, img_list, transform = None, class_names = None):
        self.img_fld = data_path + '/imgs/imgs/'
        self.train_val_test = train_val_test
        if self.train_val_test != 'test':
            self.mask_fld = data_path + '/masks/masks/'
        self.image_ids = img_list
        if transform is None:
            self.transforms = T.Compose([
                T.ToPILImage(),
                T.ToTensor(),
                T.Normalize(t_mean, t_std)
                ])
        else:
            self.transforms = transform
        self.num_class = 12
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        img_path = self.img_fld + self.image_ids[idx] + '.jpg'
        img = read_image(img_path)
        img = self.transforms(img)
        
        if self.train_val_test != 'test':
            mask_path = self.mask_fld + self.image_ids[idx] + '.png'
            mask = read_image(mask_path)
            
            if self.train_val_test == 'train':
                img,mask = augmentation(img,mask)
            else:
                mask = T.CenterCrop((512,512))(mask)
        
        if self.train_val_test == 'test':
            return img
        else:
            return img, mask

#split train validation
img_df = pd.read_csv(TrainConfig.data_path +'/'+ 'train.csv').astype(str)
idx = img_df.sample(frac=1).index.values
train_list = img_df['ImageID'][idx[:int(0.7*len(img_df))]].reset_index().drop('index',axis = 1)
train_list = train_list['ImageID'].to_numpy()
val_list = img_df['ImageID'][idx[int(0.7*len(img_df)):]].reset_index().drop('index',axis = 1) 
val_list = val_list['ImageID'].to_numpy()
test_list = pd.read_csv(TrainConfig.data_path +'/'+ 'test.csv').astype(str)
test_list = test_list['ImageID'].to_numpy()

# train dataloader
train_loader = torch.utils.data.DataLoader(
        SemSegDataset(TrainConfig.data_path, train_val_test = 'train', transform = add_transforms, img_list = train_list),
        batch_size=TrainConfig.batch_size,
        shuffle=True,
        num_workers=TrainConfig.num_workers
    )

# validation data loader
val_loader = torch.utils.data.DataLoader(
        SemSegDataset(TrainConfig.data_path, train_val_test = 'val', img_list = val_list, transform = commontransforms),
        batch_size = TrainConfig.batch_size,
        shuffle = False,
        num_workers = 0
    )

# test dataloader
test_loader = torch.utils.data.DataLoader(
        SemSegDataset(TrainConfig.data_path, train_val_test = 'test', img_list = test_list),
        batch_size = TrainConfig.batch_size,
        shuffle = False,
        num_workers=0
    )


# Visualization of image and its corresponding mask

In [None]:
# sampling one batch to show
img_batch = iter(train_loader).next()
plt.figure(figsize = [16,16])
for i,batch in enumerate(img_batch):
    for j, img in enumerate(batch):
        plt.subplot(batch.shape[0],2,batch.shape[0]*i+j+1)
        out_np = img.permute(1,2,0)
        plt.imshow(out_np.cpu().numpy())
        plt.axis('off')
plt.show()

print(img.shape)

# Evaluation metric

In [None]:
# import dice loss from kornia
eval_metric = K.losses.DiceLoss()

# Model

In [None]:
# # define frequent used block
# class DecoderBlock(nn.Module):
#     def __init__(self, channels_in, channels_out):
#         super().__init__()

#         self.decoder = nn.Sequential(
#             nn.Conv2d(channels_in, channels_in // 2, kernel_size = 1, bias = False),
#             nn.BatchNorm2d(channels_in // 2),
#             nn.ReLU(),
#             # Deconvolution
#             nn.ConvTranspose2d(
#                 channels_in // 2,
#                 channels_in // 2,
#                 kernel_size=2,
#                 stride=2,
#                 padding=0,
#                 output_padding=0,
#                 groups=channels_in // 2,
#                 bias=False
#             ),
#             nn.BatchNorm2d(channels_in // 2),
#             nn.ReLU(),
#             nn.Conv2d(channels_in // 2, channels_out, kernel_size = 1, bias = False),
#             nn.BatchNorm2d(channels_out),
#             nn.ReLU()
#         )
    
#     def forward(self, x):
#         return self.decoder(x)

In [None]:
# # create LinkNet using VGG16 for encoder blocks 
# class LinkNet(nn.Module):
#     def __init__(self, num_classes):
#         super().__init__()
#         vgg16 = getattr(models, 'vgg16')(pretrained=True)
#         self.dn0 = vgg16.features[0:5]
#         self.dn1 = vgg16.features[5:10]
#         self.dn2 = vgg16.features[10:17]
#         self.dn3 = vgg16.features[17:24]
#         self.dn4 = vgg16.features[24:31]
#         self.up1 = DecoderBlock(512,512)
#         self.up2 = DecoderBlock(512,256)
#         self.up3 = DecoderBlock(256,128)
#         self.up4 = DecoderBlock(128,64)
#         self.lastblock = nn.Sequential(
#             nn.ConvTranspose2d(64, 32, kernel_size = 3, stride = 2, bias = False),
#             nn.BatchNorm2d(32),
#             nn.ReLU(),
#             nn.Conv2d(32, 32, kernel_size = 3, padding = 0, bias=False),
#             nn.BatchNorm2d(32),
#             nn.ReLU(),
#             nn.ConvTranspose2d(32, num_classes, kernel_size = 2, padding = 0, bias = False)
#         )
        
#     def forward(self, x):
        
#         # encoders
#         en = self.dn0(x)
#         en1 = self.dn1(en)
#         en2 = self.dn2(en1)
#         en3 = self.dn3(en2)
#         en4 = self.dn4(en3)
        
#         # decoders with skip connections (additions)
#         de1 = self.up1(en4) + en3
#         de2 = self.up2(de1) + en2
#         de3 = self.up3(de2) + en1
#         de4 = self.up4(de3)
        
#         # in the paper there is additional block
#         out = self.lastblock(de4)
#         return out

# # # check whether the network output correct dim
# # test_tensor = torch.zeros(4, 3, 320, 320)
# # model = LinkNet(12)
# # pred = model(test_tensor)
# # print(pred.size())

# Model: DeepLabV3

In [None]:
from torchvision.models.segmentation import deeplabv3_resnet101

test_tensor = torch.zeros(4, 3, 1024, 1024).to(torch.device('cuda'))
model = deeplabv3_resnet101(pretrained = True)

# modify last layer
model.aux_classifier[4] = nn.Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
model.classifier[4] = nn.Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
model.to(torch.device('cuda')).eval()

# freezing resnet101 backbone
for i, child in enumerate(model.children()):
    for param in child.parameters():
        param.requires_grad = False # retrain the network
    break

pred = model(test_tensor)['out']
model.train()



In [None]:
# number of total params
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'total params: {pytorch_total_params}')

# Training

In [None]:
from torchmetrics import JaccardIndex
from kornia.losses import FocalLoss, DiceLoss

TrainConfig = TrainingConfiguration()

# Optimizer
optimizer = optim.Adam(model.parameters(), lr = TrainConfig.lr ,betas = (0.5,0.999))

# Scheduler
lmbda = lambda epoch: 1/(1+TrainConfig.decay_rate * epoch)
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lmbda)

#criterion = nn.CrossEntropyLoss(reduction = 'none')
kwargs = {"alpha": 0.5, "gamma": 2.0, "reduction": 'none'}
criterion2 = FocalLoss(**kwargs)
criterion = DiceLoss()
gamma = 2

IoU = JaccardIndex(num_classes = 12, average = 'none').to(TrainConfig.device)

storemetric = []

for epoch in range(TrainConfig.epochs_count):
    epoch_loss = []
    batch_iou = []
    for i, data in enumerate(train_loader):
        
        x, y = data
        x = x.to(TrainConfig.device)
        y = y.to(TrainConfig.device)
        
        # reset parameters gradient to zero
        optimizer.zero_grad()
        
        y_pred = model(x)["out"]
                
        # calculate focal loss
        w1 = 0.5
        w2 = 1-w1
        loss = w1*criterion(y_pred, y.squeeze().long()) + w2*criterion2(y_pred, y.squeeze().long()).mean()
        
        # calculate gradients
        loss.backward()
        
        # update parameters
        optimizer.step()
        epoch_loss.append(loss.item())
        
        # evaluate mIoU on train set
        b = y_pred.softmax(dim = 1).argmax(dim = 1).detach().squeeze()
        a = y.detach().squeeze()
        eval_metric = IoU(a,b).cpu().numpy()
        batch_iou.append(eval_metric)
        
    #find IoU for each cat.
    batch_iou = np.array(batch_iou)
    masked = np.ma.masked_equal(batch_iou, 0)
    masked_batch_iou = masked.mean(axis = 0)
    
    if ((epoch+1) % TrainConfig.plot_interval) == 0: 
        # inference 
        val_metric = []
        val_loss_arr = []
        with torch.no_grad():
            model.eval()
            for _, val_data in enumerate(val_loader):
                x_val, y_val = val_data
                x_val = x_val.to(TrainConfig.device)
                y_val = y_val.to(TrainConfig.device)
                y_pred_val = model(x_val.detach())["out"]
                val_loss = criterion(y_pred_val, y_val.squeeze().long())
                val_loss_arr.append(val_loss.item())
                # evaluate mIoU on train set
                b_val = y_pred_val.softmax(dim = 1).argmax(dim = 1).detach().squeeze()
                a_val = y_val.detach().squeeze()
                val_metric.append(IoU(a_val,b_val).cpu().numpy())
            
            val_iou = np.array(val_metric)
            masked = np.ma.masked_equal(val_iou, 0)
            masked_val_iou = masked.mean(axis = 0)
            
            # show sample inference
            for j in range(y_val.shape[0]):
                plt.subplot(2,y_val.shape[0],j+1)
                plt.imshow(y_val[j].cpu().numpy().squeeze(),vmin = 0, vmax = 12)
                plt.show()
                plt.subplot(2,y_val.shape[0],j+2)
                plt.imshow(y_pred_val.softmax(dim = 1).argmax(dim = 1)[j].detach().cpu().numpy().squeeze(),vmin = 0, vmax = 12)
                plt.show()
            model.train()
            print(f'val_mIOU {masked_val_iou.mean()}, val_loss {np.mean(val_loss_arr)}')
            
    storemetric.append(masked_batch_iou)
    
    # scheduler step/ update learning rate
    if scheduler is not None:
        scheduler.step()
          
    print(f'epoch {epoch}, loss {np.mean(epoch_loss)}, train_mIoU {storemetric[-1].mean()}')

# Create function for converting mask to encoded pixels for submission

In [None]:
# Run Length Encoding (RLE)
def mask2RLE(img, num_class, filename):
    flat_img = img.squeeze().flatten()
    rle_arr = []
    for ic in range(num_class):
        mask = []
        mask = flat_img == ic
        diff_mask = np.where(mask[:-1] != mask[1:])[0]+1
        if diff_mask[1::2].shape < diff_mask[::2].shape:
            start = diff_mask[:-1:2]
        else:
            start = diff_mask[::2]
        lth = diff_mask[1::2] - start
        EP = np.array((start,lth)).T.reshape(-1).tolist() 
        
        # if encoded pixel values are empty add NaN
        if EP == []:
            msg = float("NaN")
        else:
            msg = ' '.join(str(ep) for ep in EP)
        rle_arr.append(msg)

    name_id = [filename] * num_class
    name_id = [f'{filename}_{i}' for i in range(num_class)]    
    DF_encoded = pd.DataFrame(data = {'ImageID' : name_id , 'EncodedPixels' : rle_arr})
    return DF_encoded


# Prepare submission CSV

In [None]:
DF = pd.DataFrame()
with torch.no_grad():
    model.eval()
    for nbatch, test_data in enumerate(test_loader):
        x_test = test_data.to(TrainConfig.device)
        y_pred_test = model(x_test.detach())["out"]
        y_pred_test = y_pred_test.softmax(dim = 1).argmax(dim = 1).detach().squeeze()
        for ny, y_pred in enumerate(y_pred_test):
            df = mask2RLE(y_pred.cpu().numpy(), num_class = 12, filename = test_list[(nbatch)*16+ny])
            DF = pd.concat([DF,df])
    model.train()

DF.reset_index(drop=True).to_csv('submission.csv', index=False)

Kaggle profile link: https://www.kaggle.com/chayakorn