# Segmentation task

Hi! It's a segmentation task baseline notebook.
It include a data reader, baseline model and submission generator.

You should use GPU to train your model, so we recommend using [Kaggle Notebooks](https://www.kaggle.com/docs/notebooks).
To get maximum score of the task, your model should have IoU greater than `0.8`.

You can use everything, that suits into the rules in `README.md`.

In [1]:
from datetime import datetime
import numpy as np
from pathlib import Path
import pandas as pd

import torch
import torch.nn as nn
from torch.utils import data

import catalyst
from catalyst import dl
from catalyst.utils import metrics, imread, set_global_seed

In [2]:
set_global_seed(42)

## Dataset

Load train data. Don't forget to add test data. Use test data, to compare methods/models/etc.

In [3]:
local_path = Path().cwd().parent

In [4]:
train_image_path = local_path / 'data3'/ Path("train") / "images"
train_mask_path = local_path / 'data3'/ Path("train") / "masks"
ALL_IMAGES = sorted(train_image_path.glob("*.png"))
ALL_MASKS = sorted(train_mask_path.glob("*.png"))

In [5]:
from torch.utils.data import Dataset


class SegmentationDataset(Dataset):
    def __init__(self, images=None, masks=None, transforms=None) -> None:
        self.images = images
        self.masks = masks
        self.transforms = transforms

    def __len__(self) -> int:
        return len(self.images)

    def __getitem__(self, idx: int) -> dict:
        image_path = self.images[idx]
        image = imread(image_path)

        result = {"image": image}

        if self.masks is not None:
            result["mask"] = imread(self.masks[idx]).mean(2) // 255

        if self.transforms is not None:
            result = self.transforms(**result)
            if result.get("mask", None) is not None:
                result["mask"] = result["mask"].unsqueeze(0)

        result["filename"] = image_path.name
        result["image size"] = image.shape[:2]

        return result

## Augmentations

To train an accurate model for a segmentation task, you need a lot of data.
Use data augmentations to simulate a bigger dataset.

In [6]:
import albumentations as albu
from albumentations.pytorch import ToTensorV2 as ToTensor
import cv2

IMAGE_SIZE = 256
train_transform = albu.Compose([
    albu.HorizontalFlip(p=0.5),
    albu.VerticalFlip(p=0.5),
    albu.RandomRotate90(p=0.5),
    albu.Resize(IMAGE_SIZE, IMAGE_SIZE),
    albu.RandomResizedCrop(IMAGE_SIZE, IMAGE_SIZE, p=0.5),
    albu.Normalize(),
    ToTensor()
])

valid_transform = albu.Compose([
    albu.Resize(IMAGE_SIZE, IMAGE_SIZE),
    albu.RandomResizedCrop(IMAGE_SIZE, IMAGE_SIZE, p=0.25),
    albu.Normalize(),
    ToTensor()
])

In [7]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

batch_size = 8
num_workers = 4

indices = np.arange(len(ALL_IMAGES))

train_indices, valid_indices = train_test_split(
    indices, test_size=0.2, random_state=42, shuffle=True
)

np_images = np.array(ALL_IMAGES)
np_masks = np.array(ALL_MASKS)

train_dataset = SegmentationDataset(
    images = np_images[train_indices].tolist(),
    masks = np_masks[train_indices].tolist(),
    transforms = train_transform
)

valid_dataset = SegmentationDataset(
    images = np_images[valid_indices].tolist(),
    masks = np_masks[valid_indices].tolist(),
    transforms = valid_transform
)

loaders = {
    "train": DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        drop_last=True,
    ),
    "valid": DataLoader(
        valid_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        drop_last=True,
    )
}


Our current baseline model is `U-Net`.
You can do anything with it: add pretrained backbone, make model wider or deeper or change a model architecture.
You can use `torchvision` module to create a backbone, but not a whole model.

In [8]:
from GaborNet import GaborConv2d

In [9]:
class Baseline(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.gl = nn.Sequential(
            GaborConv2d(3, 32, kernel_size=15, padding=7),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(),
        )
        
        self.down_1 = self.make_down_layer_(32, 64)
        self.down_2 = self.make_down_layer_(64, 128)
        self.down_3 = self.make_down_layer_(128, 256)
        '''self.down_4 = self.make_down_layer_(128, 256)
        
        self.up_1 = self.make_up_layer_(256, 128)'''
        self.up_2 = self.make_up_layer_(256, 128)
        self.up_3 = self.make_up_layer_(128, 64)
        self.up_4 = self.make_up_layer_(64, 32)
        
        self.out = nn.Sequential(
            nn.Conv2d(32, 1, kernel_size=1)
        )

    def make_down_layer_(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

    def make_up_layer_(self, in_channels, out_channels):
        return nn.ModuleList(
            [
                nn.ConvTranspose2d(
                    in_channels,
                    out_channels,
                    kernel_size=3,
                    stride=2,
                    padding=1,
                    output_padding=1,
                ),
                nn.Conv2d(
                    2 * out_channels,
                    out_channels,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                ),
                nn.BatchNorm2d(out_channels),
                nn.LeakyReLU(),
                nn.Conv2d(
                    out_channels,
                    out_channels,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                ),
                nn.BatchNorm2d(out_channels),
                nn.LeakyReLU(),
            ]
        )

    def forward(self, image):
        x_0 = self.gl(image)
        
        x_1 = self.down_1(x_0)
        x_2 = self.down_2(x_1)
        x_3 = self.down_3(x_2)
        '''x_4 = self.down_4(x_3)

        u_1 = self.up_1[0](x_4)
        u_1 = torch.cat([x_3, u_1], axis=1)
        for m in self.up_1[1:]:
            u_1 = m(u_1)'''
        
        u_2 = self.up_2[0](x_3)
        u_2 = torch.cat([x_2, u_2], axis=1)
        for m in self.up_2[1:]:
            u_2 = m(u_2)

        u_3 = self.up_3[0](u_2)
        u_3 = torch.cat([x_1, u_3], axis=1)
        for m in self.up_3[1:]:
            u_3 = m(u_3)
            
        u_4 = self.up_4[0](u_3)
        u_4 = torch.cat([x_0, u_4], axis=1)
        for m in self.up_4[1:]:
            u_4 = m(u_4)
        
        return self.out(u_4)

In [10]:
'''inp = iter(loaders['train']).next()['image']
model = Baseline()
out = model(inp)
out.shape'''

"inp = iter(loaders['train']).next()['image']\nmodel = Baseline()\nout = model(inp)\nout.shape"

In [11]:
from catalyst.contrib.nn import DiceLoss, IoULoss
from catalyst.dl.runner import SupervisedRunner
from torch.nn.functional import interpolate
from catalyst.contrib.nn import RAdam, Lookahead


class SegmentationRunner(SupervisedRunner):
    def predict_batch(self, batch):
        prediction = {"filename": batch["filename"]}
        masks = self.model(batch[self.input_key].to(self.device))
        image_size = list(zip(*batch["image size"]))
        prediction["mask"] = [
            interpolate(mask.unsqueeze(0), image_size).squeeze(0)
            for mask, image_size in zip(masks, image_size)
        ]
        return prediction

# we have multiple criterions
model = Baseline()
criterion = {
    "dice": DiceLoss(),
    "iou": IoULoss(),
    "bce": nn.BCEWithLogitsLoss()
}

base_optimizer = RAdam(model.parameters(), lr=0.001, weight_decay=0.0003)
optimizer = Lookahead(base_optimizer)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=2, min_lr=1e-8, mode='max')

runner = SegmentationRunner(input_key="image", input_target_key="mask")

In [12]:
callbacks = [
    dl.CriterionCallback(
        input_key="mask", prefix="loss_dice", criterion_key="dice"
    ),
    dl.CriterionCallback(
        input_key="mask", prefix="loss_iou", criterion_key="iou"
    ),
    dl.CriterionCallback(
        input_key="mask", prefix="loss_bce", criterion_key="bce"
    ),
    dl.MetricAggregationCallback(
        prefix="loss",
        mode="weighted_sum",
        metrics={"loss_dice": 1.0, "loss_iou": 1.0, "loss_bce": 0.8},
    ),
    dl.DiceCallback(input_key="mask"),
    dl.IouCallback(input_key="mask"),
    dl.OptimizerCallback(
            metric_key="loss", accumulation_steps=1, grad_clip_params=None,
        )
]

In [13]:
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    logdir=Path("logs") / datetime.now().strftime("%Y%m%d-%H%M%S"),
    num_epochs=100,
    main_metric="iou", # kaggle competition metric
    minimize_metric=False,
    verbose=True,
)

1/100 * Epoch (train):   0% 0/41 [00:00<?, ?it/s]


This overload of add is deprecated:
	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.)



1/100 * Epoch (train): 100% 41/41 [00:21<00:00,  1.92it/s, dice=0.634, iou=0.464, loss=1.277, loss_bce=0.469, loss_dice=0.366, loss_iou=0.536]
1/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.41it/s, dice=0.402, iou=0.252, loss=1.845, loss_bce=0.623, loss_dice=0.598, loss_iou=0.748]
[2020-10-18 04:46:04,254] 
1/100 * Epoch 1 (_base): lr=0.0010 | momentum=0.9000
1/100 * Epoch 1 (train): dice=0.4809 | iou=0.3193 | loss=1.7108 | loss_bce=0.6387 | loss_dice=0.5191 | loss_iou=0.6807
1/100 * Epoch 1 (valid): dice=0.5075 | iou=0.3444 | loss=1.6019 | loss_bce=0.5673 | loss_dice=0.4925 | loss_iou=0.6556
2/100 * Epoch (train): 100% 41/41 [00:21<00:00,  1.93it/s, dice=0.523, iou=0.354, loss=1.598, loss_bce=0.594, loss_dice=0.477, loss_iou=0.646]
2/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.56it/s, dice=0.548, iou=0.378, loss=1.544, loss_bce=0.588, loss_dice=0.452, loss_iou=0.622]
[2020-10-18 04:46:27,893] 
2/100 * Epoch 2 (_base): lr=0.0010 | momentum=0.9000
2/100 * Epoch 2 (train): dice=

15/100 * Epoch (train): 100% 41/41 [00:22<00:00,  1.81it/s, dice=0.854, iou=0.745, loss=0.565, loss_bce=0.206, loss_dice=0.146, loss_iou=0.255]
15/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.50it/s, dice=0.732, iou=0.577, loss=0.912, loss_bce=0.275, loss_dice=0.268, loss_iou=0.423]
[2020-10-18 04:51:42,612] 
15/100 * Epoch 15 (_base): lr=0.0010 | momentum=0.9000
15/100 * Epoch 15 (train): dice=0.8320 | iou=0.7152 | loss=0.6320 | loss_bce=0.2241 | loss_dice=0.1680 | loss_iou=0.2848
15/100 * Epoch 15 (valid): dice=0.7934 | iou=0.6590 | loss=0.7409 | loss_bce=0.2415 | loss_dice=0.2066 | loss_iou=0.3410
16/100 * Epoch (train): 100% 41/41 [00:21<00:00,  1.90it/s, dice=0.838, iou=0.722, loss=0.605, loss_bce=0.207, loss_dice=0.162, loss_iou=0.278]
16/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.43it/s, dice=0.740, iou=0.587, loss=0.905, loss_bce=0.290, loss_dice=0.260, loss_iou=0.413]
[2020-10-18 04:52:06,660] 
16/100 * Epoch 16 (_base): lr=0.0010 | momentum=0.9000
16/100 * Epoch 16 

28/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.37it/s, dice=0.827, iou=0.705, loss=0.593, loss_bce=0.156, loss_dice=0.173, loss_iou=0.295]
[2020-10-18 04:57:01,576] 
28/100 * Epoch 28 (_base): lr=0.0010 | momentum=0.9000
28/100 * Epoch 28 (train): dice=0.8787 | iou=0.7881 | loss=0.4902 | loss_bce=0.1961 | loss_dice=0.1213 | loss_iou=0.2119
28/100 * Epoch 28 (valid): dice=0.8603 | iou=0.7569 | loss=0.5453 | loss_bce=0.2032 | loss_dice=0.1397 | loss_iou=0.2431
29/100 * Epoch (train): 100% 41/41 [00:21<00:00,  1.86it/s, dice=0.899, iou=0.817, loss=0.402, loss_bce=0.147, loss_dice=0.101, loss_iou=0.183]
29/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.27it/s, dice=0.804, iou=0.673, loss=0.727, loss_bce=0.255, loss_dice=0.196, loss_iou=0.327]
[2020-10-18 04:57:26,135] 
29/100 * Epoch 29 (_base): lr=0.0010 | momentum=0.9000
29/100 * Epoch 29 (train): dice=0.8837 | iou=0.7940 | loss=0.4787 | loss_bce=0.1956 | loss_dice=0.1163 | loss_iou=0.2060
29/100 * Epoch 29 (valid): dice=0.8778 | 

42/100 * Epoch (train): 100% 41/41 [00:22<00:00,  1.84it/s, dice=0.893, iou=0.806, loss=0.507, loss_bce=0.257, loss_dice=0.107, loss_iou=0.194]
42/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.35it/s, dice=0.849, iou=0.738, loss=0.578, loss_bce=0.206, loss_dice=0.151, loss_iou=0.262]
[2020-10-18 05:02:50,570] 
42/100 * Epoch 42 (_base): lr=0.0001 | momentum=0.9000
42/100 * Epoch 42 (train): dice=0.9082 | iou=0.8332 | loss=0.3837 | loss_bce=0.1565 | loss_dice=0.0918 | loss_iou=0.1668
42/100 * Epoch 42 (valid): dice=0.9049 | iou=0.8289 | loss=0.3820 | loss_bce=0.1447 | loss_dice=0.0951 | loss_iou=0.1711
43/100 * Epoch (train): 100% 41/41 [00:22<00:00,  1.85it/s, dice=0.930, iou=0.870, loss=0.321, loss_bce=0.151, loss_dice=0.070, loss_iou=0.130]
43/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.05it/s, dice=0.831, iou=0.710, loss=0.612, loss_bce=0.191, loss_dice=0.169, loss_iou=0.290]
[2020-10-18 05:03:15,386] 
43/100 * Epoch 43 (_base): lr=0.0001 | momentum=0.9000
43/100 * Epoch 43 

55/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.26it/s, dice=0.869, iou=0.768, loss=0.533, loss_bce=0.212, loss_dice=0.131, loss_iou=0.232]
[2020-10-18 05:08:18,478] 
55/100 * Epoch 55 (_base): lr=1.563e-05 | momentum=0.9000
55/100 * Epoch 55 (train): dice=0.9100 | iou=0.8362 | loss=0.3731 | loss_bce=0.1491 | loss_dice=0.0900 | loss_iou=0.1638
55/100 * Epoch 55 (valid): dice=0.9048 | iou=0.8283 | loss=0.4043 | loss_bce=0.1718 | loss_dice=0.0952 | loss_iou=0.1717
56/100 * Epoch (train): 100% 41/41 [00:23<00:00,  1.75it/s, dice=0.950, iou=0.904, loss=0.226, loss_bce=0.100, loss_dice=0.050, loss_iou=0.096]
56/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.36it/s, dice=0.853, iou=0.744, loss=0.551, loss_bce=0.185, loss_dice=0.147, loss_iou=0.256]
[2020-10-18 05:08:44,526] 
56/100 * Epoch 56 (_base): lr=7.813e-06 | momentum=0.9000
56/100 * Epoch 56 (train): dice=0.9090 | iou=0.8346 | loss=0.3767 | loss_bce=0.1504 | loss_dice=0.0910 | loss_iou=0.1654
56/100 * Epoch 56 (valid): dice=0.9

69/100 * Epoch (train): 100% 41/41 [00:21<00:00,  1.87it/s, dice=0.846, iou=0.734, loss=0.623, loss_bce=0.253, loss_dice=0.154, loss_iou=0.266]
69/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.23it/s, dice=0.851, iou=0.741, loss=0.570, loss_bce=0.203, loss_dice=0.149, loss_iou=0.259]
[2020-10-18 05:14:13,765] 
69/100 * Epoch 69 (_base): lr=9.766e-07 | momentum=0.9000
69/100 * Epoch 69 (train): dice=0.9140 | iou=0.8432 | loss=0.3571 | loss_bce=0.1429 | loss_dice=0.0860 | loss_iou=0.1568
69/100 * Epoch 69 (valid): dice=0.9037 | iou=0.8266 | loss=0.3999 | loss_bce=0.1628 | loss_dice=0.0963 | loss_iou=0.1734
70/100 * Epoch (train): 100% 41/41 [00:22<00:00,  1.84it/s, dice=0.943, iou=0.891, loss=0.227, loss_bce=0.077, loss_dice=0.057, loss_iou=0.109]
70/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.29it/s, dice=0.870, iou=0.770, loss=0.506, loss_bce=0.182, loss_dice=0.130, loss_iou=0.230]
[2020-10-18 05:14:38,559] 
70/100 * Epoch 70 (_base): lr=9.766e-07 | momentum=0.9000
70/100 * Epo

82/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.32it/s, dice=0.866, iou=0.764, loss=0.525, loss_bce=0.194, loss_dice=0.134, loss_iou=0.236]
[2020-10-18 05:19:35,646] 
82/100 * Epoch 82 (_base): lr=6.104e-08 | momentum=0.9000
82/100 * Epoch 82 (train): dice=0.9130 | iou=0.8417 | loss=0.3660 | loss_bce=0.1509 | loss_dice=0.0870 | loss_iou=0.1583
82/100 * Epoch 82 (valid): dice=0.9035 | iou=0.8261 | loss=0.4071 | loss_bce=0.1710 | loss_dice=0.0965 | loss_iou=0.1739
83/100 * Epoch (train): 100% 41/41 [00:21<00:00,  1.90it/s, dice=0.882, iou=0.789, loss=0.536, loss_bce=0.258, loss_dice=0.118, loss_iou=0.211]
83/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.36it/s, dice=0.857, iou=0.750, loss=0.555, loss_bce=0.203, loss_dice=0.143, loss_iou=0.250]
[2020-10-18 05:19:59,720] 
83/100 * Epoch 83 (_base): lr=3.052e-08 | momentum=0.9000
83/100 * Epoch 83 (train): dice=0.9137 | iou=0.8429 | loss=0.3570 | loss_bce=0.1420 | loss_dice=0.0863 | loss_iou=0.1571
83/100 * Epoch 83 (valid): dice=0.9

96/100 * Epoch (train): 100% 41/41 [00:21<00:00,  1.88it/s, dice=0.907, iou=0.829, loss=0.447, loss_bce=0.229, loss_dice=0.093, loss_iou=0.171]
96/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.31it/s, dice=0.849, iou=0.737, loss=0.595, loss_bce=0.226, loss_dice=0.151, loss_iou=0.263]
[2020-10-18 05:25:25,384] 
96/100 * Epoch 96 (_base): lr=1.526e-08 | momentum=0.9000
96/100 * Epoch 96 (train): dice=0.9037 | iou=0.8272 | loss=0.4036 | loss_bce=0.1681 | loss_dice=0.0963 | loss_iou=0.1728
96/100 * Epoch 96 (valid): dice=0.9126 | iou=0.8416 | loss=0.3590 | loss_bce=0.1414 | loss_dice=0.0874 | loss_iou=0.1584
97/100 * Epoch (train): 100% 41/41 [00:21<00:00,  1.87it/s, dice=0.949, iou=0.903, loss=0.209, loss_bce=0.076, loss_dice=0.051, loss_iou=0.097]
97/100 * Epoch (valid): 100% 10/10 [00:02<00:00,  4.43it/s, dice=0.855, iou=0.746, loss=0.539, loss_bce=0.175, loss_dice=0.145, loss_iou=0.254]
[2020-10-18 05:25:49,851] 
97/100 * Epoch 97 (_base): lr=1.526e-08 | momentum=0.9000
97/100 * Epo

## Submission

To generate submission, you'll have to write masks for images.
Usually, in `Kaggle` segmentation competitions masks are encoded in the run length format.
For more information, check `Evaluation` page in `Overview`.

In [14]:
def rle_encoding(x):
    """
    x: numpy array of shape (height, width), 1 - mask, 0 - background
    Returns run length as list
    """
    dots = np.where(x.T.flatten() == 1)[
        0
    ]  # .T sets Fortran order down-then-right
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return " ".join([str(i) for i in run_lengths])

This code below will generate a submission.
It reads images from `test` folder and gathers prediction from the trained model.
Check your submission before uploading it into `Kaggle`.

In [40]:
from PIL import Image
import pandas as pd

submission = {"ImageId": [], "EncodedPixels": []}
threshold = 0.5

test_transform = albu.Compose([
    albu.Resize(IMAGE_SIZE, IMAGE_SIZE),
    albu.Normalize(),
    ToTensor()
])


test_image_path = local_path / 'data3' / Path("test")
TEST_IMAGES = sorted(test_image_path.glob("*.png"))
test_dataset = SegmentationDataset(
    images=TEST_IMAGES,
    transforms=test_transform
)
test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
)

for prediction in runner.predict_loader(loader=test_loader):
    submission["ImageId"].extend(s[:-4] for s in prediction["filename"])
    submission["EncodedPixels"].extend(
        rle_encoding(torch.sigmoid(mask.cpu()).numpy().squeeze(0) > threshold) for mask in prediction["mask"]
    )