# Semantic Segmentation using U-NET model

## Import libraries

In [32]:
# visualization library
import cv2
from matplotlib import pyplot as plt
# data storing library
import numpy as np
import pandas as pd
# torch libraries
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset, sampler
from torchvision import transforms
# architecture and data split library
from sklearn.model_selection import train_test_split
# augmentation library
from albumentations import (HorizontalFlip, ShiftScaleRotate, Normalize, Resize, Compose, GaussNoise)
from albumentations.pytorch.transforms import ToTensorV2
import os
import pdb
import time
import warnings
import random
from tqdm import tqdm_notebook as tqdm
import concurrent.futures

# warning print supression
warnings.filterwarnings("ignore")
import PIL

## Preprocessing

### Fixing seed value

In [2]:
def setSeed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


setSeed(42)

In [3]:
# !mkdir -p '.pytorch/carvanasemseg/'
# !mkdir -p '.pytorch/carvanasemseg/train_masks_png/'
# !mkdir -p '.pytorch/carvanasemseg/train-128/'
# !mkdir -p '.pytorch/carvanasemseg/train_masks-128/'

def convert_img(fn):
    PIL.Image.open(f'.pytorch/carvanasemseg/train_masks/{fn}').save(
        f'.pytorch/carvanasemseg/train_masks_png/{fn[:-4]}.png')  #opening and saving image


fns = os.listdir('.pytorch/carvanasemseg/train_masks')
with concurrent.futures.ThreadPoolExecutor(8) as e:
    e.map(convert_img, fns)


def resize_mask(fn):
    PIL.Image.open('.pytorch/carvanasemseg/train_masks_png/' + fn).resize((128, 128)).save(
        f'.pytorch/carvanasemseg/train_masks-128/{fn}')


fns = os.listdir('.pytorch/carvanasemseg/train_masks_png/')
with concurrent.futures.ThreadPoolExecutor(8) as e:
    e.map(resize_mask, fns)


# we convert the high resolution input image to 128*128
def resize_img(fn):
    PIL.Image.open('.pytorch/carvanasemseg/train/' + fn).resize((128, 128)).save(
        f'.pytorch/carvanasemseg/train-128/{fn}')


fns = os.listdir('.pytorch/carvanasemseg/train/')
with concurrent.futures.ThreadPoolExecutor(8) as e:
    e.map(resize_img, fns)

In [4]:
df = pd.read_csv('.pytorch/carvanasemseg/train_masks.csv')
# location of original and mask image
img_fol = '.pytorch/carvanasemseg/train-128/'
mask_fol = '.pytorch/carvanasemseg/train_masks-128/'
# imagenet mean/std will be used as the resnet backbone is trained on imagenet stats
mean, std = (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)

In [5]:
df.head()

Unnamed: 0,img,rle_mask
0,00087a6bd4dc_01.jpg,879386 40 881253 141 883140 205 885009 17 8850...
1,00087a6bd4dc_02.jpg,873779 4 875695 7 877612 9 879528 12 881267 15...
2,00087a6bd4dc_03.jpg,864300 9 866217 13 868134 15 870051 16 871969 ...
3,00087a6bd4dc_04.jpg,879735 20 881650 26 883315 92 883564 30 885208...
4,00087a6bd4dc_05.jpg,883365 74 883638 28 885262 119 885550 34 88716...


### Transformation

In [55]:
# input-->"phase",mean,std
# output-->list
def get_transform(phase, mean, std):
    list_trans = []
    if phase == 'train':  # only flip during training
        list_trans.extend([HorizontalFlip(p=0.5)])
    list_trans.extend(
        [Normalize(mean=mean, std=std, p=1), ToTensorV2()])  #normalizing the data & then converting to tensors
    list_trans = Compose(list_trans)
    return list_trans

### Dataset and Dataloader

In [56]:

class CarDataset(Dataset):
    def __init__(self, df, img_fol, mask_fol, mean, std, phase):
        self.fname = df['img'].values.tolist()
        self.img_fol = img_fol
        self.mask_fol = mask_fol
        self.mean = mean
        self.std = std
        self.phase = phase
        self.trasnform = get_transform(phase, mean, std)

    def __getitem__(self, idx):
        name = self.fname[idx]
        img_name_path = os.path.join(self.img_fol, name)
        mask_name_path = os.path.join(self.mask_fol, name.replace('.jpg', '_mask.png'))
        img = cv2.imread(img_name_path)
        mask = cv2.imread(mask_name_path, cv2.IMREAD_GRAYSCALE)
        augmentation = self.trasnform(image=img, mask=mask)
        img_aug = augmentation['image']  #[3,128,128] type:Tensor
        mask_aug = augmentation['mask']  #[1,128,128] type:Tensor
        return img_aug, mask_aug

    def __len__(self):
        return len(self.fname)

In [57]:
def CarDataloader(df, img_fol, mask_fol, mean, std, batch_size, num_workers):
    df_train, df_valid = train_test_split(df, test_size=0.2, random_state=69)
    traindataset = CarDataset(df_train, img_fol, mask_fol, mean, std, 'train')
    traindataloader = DataLoader(traindataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True)
    valdataset = CarDataset(df_valid, img_fol, mask_fol, mean, std, 'test')
    valdataloader = DataLoader(valdataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True)

    return traindataloader, valdataloader

In [58]:
NUM_WORKER = 0
BATCH_SIZE = 1
train_dl, val_dl = CarDataloader(df, img_fol, mask_fol, mean, std, batch_size=BATCH_SIZE, num_workers=NUM_WORKER)

In [59]:
xb, yb = next(iter(train_dl))
xb.shape, yb.shape

(torch.Size([1, 3, 128, 128]), torch.Size([1, 128, 128]))

## UNET Model

In [69]:
class UNET(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv1 = self.contract_block(in_channels, 32, 7, 3)
        self.conv2 = self.contract_block(32, 64, 3, 1)
        self.conv3 = self.contract_block(64, 128, 3, 1)

        self.upconv3 = self.expand_block(128, 64, 3, 1)
        self.upconv2 = self.expand_block(64 * 2, 32, 3, 1)
        self.upconv1 = self.expand_block(32 * 2, out_channels, 3, 1)

    def contract_block(self, in_channels, out_channels, kernel_size, padding):
        contract = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=1, padding=padding),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=kernel_size, stride=1, padding=padding),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        return contract

    def expand_block(self, in_channels, out_channels, kernel_size, padding):
        expand = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=padding),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size, stride=1, padding=padding),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.ConvTranspose2d(out_channels, out_channels, kernel_size=3, stride=2, padding=1, output_padding=1)
        )
        return expand

    def forward(self, x):
        # down-sampling
        conv1 = self.conv1(x)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)

        # upsampling
        upconv3 = self.upconv3(conv3)
        upconv2 = self.upconv2(torch.cat([upconv3, conv2], 1))
        upconv1 = self.upconv1(torch.cat([upconv2, conv1], 1))

        return upconv1

In [70]:
unet = UNET(3, 2)

In [72]:
pred = unet(xb)
xb.shape, pred.shape

(torch.Size([1, 3, 128, 128]), torch.Size([1, 2, 128, 128]))