# Image Segmentation
An image is a collection or set of different pixels. We group together the pixels that have similar attributes using image segmentation.. Thus, the task of image segmentation is to train a neural network to output a pixel-wise mask of the image. This helps in understanding the image at a much lower level, i.e., the pixel level.In image segmetation each pixel is given a label.

Before diving into code, first we will need to import all the required libraries

# Import libraries

In [None]:
from PIL import Image
import numpy as np 
import pandas as pd 
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from zipfile import ZipFile 
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import os
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Dataset Preparation

In [None]:
pip install segmentation-models-pytorch

Let's unzip all the files

In [None]:
train_zip = "/kaggle/input/carvana-image-masking-challenge/train.zip"
with ZipFile(train_zip, 'r') as zip_: 
    zip_.extractall('/kaggle/working')

In [None]:
train_mask_zip = "/kaggle/input/carvana-image-masking-challenge/train_masks.zip"
with ZipFile(train_mask_zip, 'r') as zip_: 
    zip_.extractall('/kaggle/working')

In [None]:
print("Train set:  ", len(os.listdir("/kaggle/working/train")))
print("Train masks:", len(os.listdir("/kaggle/working/train_masks")))

In [None]:
car_ids = []
paths = []
for dirname, _, filenames in os.walk('/kaggle/working/train'):
    for filename in filenames:
        path = os.path.join(dirname, filename)    
        paths.append(path)
        
        car_id = filename.split(".")[0]
        car_ids.append(car_id)

d = {"id": car_ids, "car_path": paths}
df = pd.DataFrame(data = d)
df = df.set_index('id')
df

In [None]:
df.iloc[0, :]['car_path']

In [None]:
car_ids = []
mask_path = []
for dirname, _, filenames in os.walk('/kaggle/working/train_masks'):
    for filename in filenames:
        path = os.path.join(dirname, filename)
        mask_path.append(path)
        
        car_id = filename.split(".")[0]
        car_id = car_id.split("_mask")[0]
        car_ids.append(car_id)

        
d = {"id": car_ids,"mask_path": mask_path}
mask_df = pd.DataFrame(data = d)
mask_df = mask_df.set_index('id')
mask_df

In [None]:
df["mask_path"] = mask_df["mask_path"]
df

In [None]:
train_df, val_df = train_test_split(df, test_size=0.25, shuffle = True)

dataset preparation

In [None]:
class CustomDataset(Dataset):
    def __init__(self, df, transform = None):
        super(CustomDataset, self).__init__()
        self.df = df
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        car_path = self.df.iloc[idx, :]['car_path']
        mask_path = self.df.iloc[idx, :]['mask_path']
        car = np.array(Image.open(car_path).convert('RGB'))
        mask = np.array(Image.open(mask_path).convert('L'))
        if self.transform:
            transformed = self.transform(image=np.array(car), mask=np.array(mask))
            car = transformed['image']
            mask = transformed['mask']
        car = transforms.ToTensor()(car)
        mask = transforms.ToTensor()(mask)
        return car, mask

In [None]:
transform = A.Compose([
    A.Resize(512, 512, p=1)
])

In [None]:
train_dataset = CustomDataset(train_df, transform)
val_dataset = CustomDataset(val_df, transform)

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4, pin_memory=True )
val_dataloader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=4, pin_memory=True )

In [None]:
dataiter = iter(val_dataloader)
cars, masks = next(dataiter)

In [None]:
cars.shape

In [None]:
masks.shape

In [None]:
torch.unique(masks)

In [None]:
cars

In [None]:
masks

In [None]:
car, mask = train_dataset[0]

In [None]:
car.shape

In [None]:
torch.unique(mask)

In [None]:
mask.dtype
car.dtype

Now we will split the dataset into train and test

In [None]:
len(train_df)

In [None]:
len(val_df)

Let's look the image and it's corresponding mask

# Model

We are going to use U-Net model. A U-Net consists of an encoder (downsampler) and decoder (upsampler). In-order to learn robust features, and reduce the number of trainable parameters, a pretrained model can be used as the encoder.The encoder will be a pretrained MobileNetV2 model which is prepared and ready to use in tf.keras.applications. 

In [None]:
import segmentation_models_pytorch as smp

In [None]:
model = smp.Unet(encoder_name="resnet34", encoder_weights="imagenet", in_channels=3, classes = 1)

In [None]:
model

In [None]:
model.segmentation_head

In [None]:
pip install torchsummary

In [None]:
from torchsummary import summary

In [None]:
!pip install hiddenlayer

In [None]:
batch = torch.rand(1, 3, 512, 512)

In [None]:
import hiddenlayer as hl
transforms = [ hl.transforms.Prune('Constant') ]
graph = hl.build_graph(model, batch, transforms=transforms)
graph.theme = hl.graph.THEMES['blue'].copy()
graph.save('unet_hiddenlayer', format='png')

In [None]:
summary(model, (3, 512, 512), 1,'cpu')

In [None]:
d = torch.rand(1, 3, 512, 512)
model(d).shape

# Train the Model

Now let's compile the model and see the model architecture

In [None]:
def dice_coef(y_true, y_pred, smooth=1):
    intersection = K.sum(y_true * y_pred, axis=[1,2,3])
    union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3])
    return K.mean( (2. * intersection + smooth) / (union + smooth), axis=0)

def dice_loss(in_gt, in_pred):
    return 1-dice_coef(in_gt, in_pred)

Let's try out the model to see what it predicts before training.

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
model.to(device)

In [None]:
criterion= nn.BCEWithLogitsLoss()
optimizer= torch.optim.Adam(model.parameters(),lr=1e-3)

In [None]:
from tqdm import tqdm

In [None]:
def meanIOU(target, predicted):
    if target.shape != predicted.shape:
        print("target has dimension", target.shape, ", predicted values have shape", predicted.shape)
        return
        
    if target.dim() != 4:
        print("target has dim", target.dim(), ", Must be 4.")
        return
    
    iousum = 0
    for i in range(target.shape[0]):
        target_arr = target[i, :, :, :].clone().detach().cpu().numpy().argmax(0)
        predicted_arr = predicted[i, :, :, :].clone().detach().cpu().numpy().argmax(0)
        
        intersection = np.logical_and(target_arr, predicted_arr).sum()
        union = np.logical_or(target_arr, predicted_arr).sum()
        if union == 0:
            iou_score = 0
        else :
            iou_score = intersection / union
        iousum +=iou_score
        
    miou = iousum/target.shape[0]
    return miou

In [None]:
def pixelAcc(target, predicted):    
    if target.shape != predicted.shape:
        print("target has dimension", target.shape, ", predicted values have shape", predicted.shape)
        return
        
    if target.dim() != 4:
        print("target has dim", target.dim(), ", Must be 4.")
        return
    
    accsum=0
    for i in range(target.shape[0]):
        target_arr = target[i, :, :, :].clone().detach().cpu().numpy().argmax(0)
        predicted_arr = predicted[i, :, :, :].clone().detach().cpu().numpy().argmax(0)
        
        same = (target_arr == predicted_arr).sum()
        a, b = target_arr.shape
        total = a*b
        accsum += same/total
    
    pixelAccuracy = accsum/target.shape[0]        
    return pixelAccuracy

In [None]:
epochs = 10
losses = []
pixel_acc = []

In [None]:
import time

In [None]:
def diceloss(pred, gt, eps=1e-5):
    r""" computational formula：
        dice = (2 * tp) / (2 * tp + fp + fn)
    """
    pred = torch.sigmoid(pred)
 
    N = gt.size(0)
    pred_flat = pred.view(N, -1)
    gt_flat = gt.view(N, -1)
    #print("pred: ", pred_flat)
    #print("gt: ",gt_flat)
 
    tp = torch.sum(gt_flat * pred_flat, dim=1)
    fp = torch.sum(pred_flat, dim=1) - tp
    fn = torch.sum(gt_flat, dim=1) - tp
    #print("tp: ", tp)
    #print("fp: ", fp)
    #print("fn: ", fn)
    loss = 1-((2 * tp + eps) / (2 * tp + fp + fn + eps))
    return loss.sum() / N

*Demo*

In [None]:
gt = torch.rand(2, 1, 2, 2)
print(gt)
gt = (gt>0.5).float()
gt

In [None]:
pd = torch.rand(2, 1, 2, 2)
print(pd)
pd = (pd>0.5).float()
pd

In [None]:
diceloss(pd, gt)

In [None]:
2*torch.tensor([1.2311, 2.6932])

In [None]:
2*torch.tensor([1.2311, 2.6932])+torch.tensor([1.2311, 0.0000])+torch.tensor([0.7689, 1.3068])

In [None]:
1-torch.tensor([2.4622, 5.3864])/torch.tensor([4.4622, 6.6932])

In [None]:
torch.tensor([0.4482, 0.1952]).sum()/2

In [None]:
for n in range(5):
    num_correct = 0
    num_pixel = 0
    dice_loss = 0.0
    epoch_loss = 0
    batch_size = 0
    model.train()
    batch_loop = tqdm(train_dataloader)
    for i, batch in enumerate(batch_loop):
        batch_loop.set_description(f"Epoch {n}")
        cars, masks = batch
        cars = cars.to(device)
        masks = masks.to(device)
        preds = model(cars)
        loss = diceloss(preds, masks)   ##perbatch diceloss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #print(f"loss: {loss.item()}")
        dice_loss += loss.item()
        """
        p = torch.sigmoid(preds.data)
        p = (p>0.5).float()
        num_correct += (masks==p).float().sum().item()
        num_pixel += torch.numel(preds.data)"""
        batch_loop.set_postfix(loss=loss.item())
        time.sleep(0.1)
        batch_size = i
        
        
    epoch_loss = dice_loss/(batch_size+1)
    print(f"loss: {dice_loss}/{batch_size+1} is {epoch_loss}")
    print(f"accuracy: {1-epoch_loss}")
    #print(f"accuracy: {num_correct/num_pixel}")
    #pixel_acc.append(num_correct/num_pixel)
#total /= len(train_dataloader.dataset)

# Evaluation

In [None]:
num_correct = 0
num_pixel = 0
model.eval()
with torch.no_grad():
    batch_loop = tqdm(val_dataloader)
    for i, batch in enumerate(batch_loop):
        cars, masks = batch
        cars = cars.to(device)
        masks = masks.to(device)
        preds = model(cars)
        #print(f"loss: {loss.item()}")
        p = torch.sigmoid(preds.data)
        p = (p>0.5).float()
        num_correct += (masks==p).float().sum().item()
        num_pixel += torch.numel(preds.data)
        batch_loop.set_postfix(loss=loss.item())
        #time.sleep(0.1)
    
    print(f"accuracy: {num_correct/num_pixel}")

In [None]:
len(train_dataloader.dataset)*512*512

In [None]:
pixel_acc[1]/(512*512)

In [None]:
masks

In [None]:
y = torch.sigmoid(preds.data)

In [None]:
y = (y>0.5).float()

In [None]:
y

In [None]:
correct = (y==masks).sum()
correct

In [None]:
torch.numel(y)