# Segmentation task

Hi! It's a segmentation task baseline notebook.
It include a data reader, baseline model and submission generator.

You should use GPU to train your model, so we recommend using [Kaggle Notebooks](https://www.kaggle.com/docs/notebooks).
To get maximum score of the task, your model should have IoU greater than `0.8`.

You can use everything, that suits into the rules in `README.md`.

In [None]:
from datetime import datetime
import numpy as np
from pathlib import Path
import pandas as pd

import torch
import torch.nn as nn
from torch.utils import data

import catalyst
from catalyst import dl
from catalyst.utils import metrics, imread, set_global_seed

from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

import albumentations as albu
from albumentations.pytorch import ToTensorV2 as ToTensor
import cv2

In [2]:
set_global_seed(42)

## Dataset

Load train data. Don't forget to add test data. Use test data, to compare methods/models/etc.

In [3]:
local_path = Path().cwd().parent

In [4]:
train_image_path = local_path / 'data3'/ Path("train") / "images"
train_mask_path = local_path / 'data3'/ Path("train") / "masks"
ALL_IMAGES = sorted(train_image_path.glob("*.png"))
ALL_MASKS = sorted(train_mask_path.glob("*.png"))

In [5]:
class SegmentationDataset(Dataset):
    def __init__(self, images=None, masks=None, transforms=None) -> None:
        self.images = images
        self.masks = masks
        self.transforms = transforms

    def __len__(self) -> int:
        return len(self.images)

    def __getitem__(self, idx: int) -> dict:
        image_path = self.images[idx]
        image = imread(image_path)

        result = {"image": image}

        if self.masks is not None:
            result["mask"] = imread(self.masks[idx]).mean(2) // 255

        if self.transforms is not None:
            result = self.transforms(**result)
            if result.get("mask", None) is not None:
                result["mask"] = result["mask"].unsqueeze(0)

        result["filename"] = image_path.name
        result["image size"] = image.shape[:2]

        return result

## Augmentations

To train an accurate model for a segmentation task, you need a lot of data.
Use data augmentations to simulate a bigger dataset.

In [6]:
IMAGE_SIZE = 256
train_transform = albu.Compose([
    albu.HorizontalFlip(p=0.3),
    albu.VerticalFlip(p=0.3),
    albu.RandomRotate90(p=0.3),
    albu.Resize(IMAGE_SIZE, IMAGE_SIZE),
    albu.RandomResizedCrop(IMAGE_SIZE, IMAGE_SIZE, p=0.3),
    albu.Normalize(),
    ToTensor()
])

valid_transform = albu.Compose([
    albu.Resize(IMAGE_SIZE, IMAGE_SIZE),
    albu.RandomResizedCrop(IMAGE_SIZE, IMAGE_SIZE, p=0.25),
    albu.Normalize(),
    ToTensor()
])

In [7]:
batch_size = 8
num_workers = 4

indices = np.arange(len(ALL_IMAGES))

train_indices, valid_indices = train_test_split(
    indices, test_size=0.1, random_state=42, shuffle=True
)

np_images = np.array(ALL_IMAGES)
np_masks = np.array(ALL_MASKS)

train_dataset = SegmentationDataset(
    images = np_images.tolist(),
    masks = np_masks.tolist(),
    transforms = train_transform
)
loaders = {
    "train": DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        drop_last=True,
    ),
}

Our current baseline model is `U-Net`.
You can do anything with it: add pretrained backbone, make model wider or deeper or change a model architecture.
You can use `torchvision` module to create a backbone, but not a whole model.

In [8]:
class Baseline(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.down_1 = self.make_down_layer_(3, 64)
        self.down_2 = self.make_down_layer_(64, 128)
        self.down_3 = self.make_down_layer_(128, 256)
        self.down_4 = self.make_down_layer_(256, 512)

        self.up_1 = self.make_up_layer_(512, 256)
        self.up_2 = self.make_up_layer_(256, 128)
        self.up_3 = self.make_up_layer_(128, 64)
        self.up_4 = nn.Sequential(
            nn.ConvTranspose2d(64, 1, kernel_size=3, padding=1, stride=2, output_padding=1)
        )

    def make_down_layer_(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

    def make_up_layer_(self, in_channels, out_channels):
        return nn.ModuleList(
            [
                nn.ConvTranspose2d(
                    in_channels,
                    out_channels,
                    kernel_size=3,
                    stride=2,
                    padding=1,
                    output_padding=1,
                ),
                nn.BatchNorm2d(2 * out_channels),
                nn.LeakyReLU(),
                nn.ConvTranspose2d(
                    2 * out_channels,
                    out_channels,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                ),
                nn.BatchNorm2d(out_channels),
                nn.LeakyReLU(),
            ]
        )

    def forward(self, image):
        x_1 = self.down_1(image)
        x_2 = self.down_2(x_1)
        x_3 = self.down_3(x_2)
        x_4 = self.down_4(x_3)

        u_1 = self.up_1[0](x_4)
        u_1 = torch.cat([x_3, u_1], axis=1)
        for m in self.up_1[1:]:
            u_1 = m(u_1)
        
        u_2 = self.up_2[0](u_1)
        u_2 = torch.cat([x_2, u_2], axis=1)
        for m in self.up_2[1:]:
            u_2 = m(u_2)

        u_3 = self.up_3[0](u_2)
        u_3 = torch.cat([x_1, u_3], axis=1)
        for m in self.up_3[1:]:
            u_3 = m(u_3)

        return self.up_4(u_3)

In [9]:
from catalyst.contrib.nn import DiceLoss, IoULoss
from catalyst.dl.runner import SupervisedRunner
from torch.nn.functional import interpolate


class SegmentationRunner(SupervisedRunner):
    def predict_batch(self, batch):
        prediction = {"filename": batch["filename"]}
        masks = self.model(batch[self.input_key].to(self.device))
        image_size = list(zip(*batch["image size"]))
        prediction["mask"] = [
            interpolate(mask.unsqueeze(0), image_size).squeeze(0)
            for mask, image_size in zip(masks, image_size)
        ]
        return prediction

# we have multiple criterions
model = Baseline()
criterion = {
    "dice": DiceLoss(),
    "iou": IoULoss(),
    "bce": nn.BCEWithLogitsLoss()
}

optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=0.0003)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.25, patience=3, min_lr=1e-8, mode='max')

runner = SegmentationRunner(input_key="image", input_target_key="mask")

In [10]:
callbacks = [
    dl.CriterionCallback(
        input_key="mask", prefix="loss_dice", criterion_key="dice"
    ),
    dl.CriterionCallback(
        input_key="mask", prefix="loss_iou", criterion_key="iou"
    ),
    dl.CriterionCallback(
        input_key="mask", prefix="loss_bce", criterion_key="bce"
    ),
    dl.MetricAggregationCallback(
        prefix="loss",
        mode="weighted_sum",
        metrics={"loss_dice": 1.0, "loss_iou": 1.0, "loss_bce": 0.8},
    ),
    dl.DiceCallback(input_key="mask"),
    dl.IouCallback(input_key="mask"),
    dl.EarlyStoppingCallback(
            patience=7,
            metric="iou",
            minimize=False,
            min_delta=1e-4,
        )
]

In [11]:
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=callbacks,
    logdir=Path("logs") / datetime.now().strftime("%Y%m%d-%H%M%S"),
    num_epochs=100,
    main_metric="iou", # kaggle competition metric
    minimize_metric=False,
    verbose=True,
)


Attention, there is only one dataloader - train



1/100 * Epoch (train):   0% 0/51 [00:00<?, ?it/s]


This overload of add is deprecated:
	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.)



1/100 * Epoch (train): 100% 51/51 [00:17<00:00,  2.90it/s, dice=0.832, iou=0.713, loss=0.654, loss_bce=0.249, loss_dice=0.168, loss_iou=0.287]
[2020-10-18 12:38:00,249] 
1/100 * Epoch 1 (_base): lr=0.0003 | momentum=0.9000
1/100 * Epoch 1 (train): dice=0.7344 | iou=0.5894 | loss=0.9718 | loss_bce=0.3695 | loss_dice=0.2656 | loss_iou=0.4106
2/100 * Epoch (train): 100% 51/51 [00:17<00:00,  2.93it/s, dice=0.779, iou=0.638, loss=0.871, loss_bce=0.361, loss_dice=0.221, loss_iou=0.362]
[2020-10-18 12:38:18,000] 
2/100 * Epoch 2 (_base): lr=0.0003 | momentum=0.9000
2/100 * Epoch 2 (train): dice=0.7981 | iou=0.6686 | loss=0.7725 | loss_bce=0.2989 | loss_dice=0.2019 | loss_iou=0.3314
3/100 * Epoch (train): 100% 51/51 [00:17<00:00,  2.88it/s, dice=0.876, iou=0.779, loss=0.492, loss_bce=0.184, loss_dice=0.124, loss_iou=0.221]
[2020-10-18 12:38:42,057] 
3/100 * Epoch 3 (_base): lr=0.0003 | momentum=0.9000
3/100 * Epoch 3 (train): dice=0.8224 | iou=0.7014 | loss=0.6821 | loss_bce=0.2575 | loss_dice

25/100 * Epoch (train): 100% 51/51 [00:18<00:00,  2.83it/s, dice=0.810, iou=0.681, loss=0.797, loss_bce=0.360, loss_dice=0.190, loss_iou=0.319]
[2020-10-18 12:45:51,163] 
25/100 * Epoch 25 (_base): lr=7.500e-05 | momentum=0.9000
25/100 * Epoch 25 (train): dice=0.9176 | iou=0.8495 | loss=0.3456 | loss_bce=0.1408 | loss_dice=0.0824 | loss_iou=0.1505
26/100 * Epoch (train): 100% 51/51 [00:18<00:00,  2.83it/s, dice=0.915, iou=0.842, loss=0.372, loss_bce=0.161, loss_dice=0.085, loss_iou=0.158]
[2020-10-18 12:46:10,160] 
26/100 * Epoch 26 (_base): lr=7.500e-05 | momentum=0.9000
26/100 * Epoch 26 (train): dice=0.9224 | iou=0.8575 | loss=0.3251 | loss_bce=0.1311 | loss_dice=0.0776 | loss_iou=0.1425
27/100 * Epoch (train): 100% 51/51 [00:17<00:00,  2.84it/s, dice=0.897, iou=0.814, loss=0.466, loss_bce=0.222, loss_dice=0.103, loss_iou=0.186]
[2020-10-18 12:46:29,201] 
27/100 * Epoch 27 (_base): lr=7.500e-05 | momentum=0.9000
27/100 * Epoch 27 (train): dice=0.9193 | iou=0.8522 | loss=0.3448 | los

## Submission

To generate submission, you'll have to write masks for images.
Usually, in `Kaggle` segmentation competitions masks are encoded in the run length format.
For more information, check `Evaluation` page in `Overview`.

In [12]:
def rle_encoding(x):
    """
    x: numpy array of shape (height, width), 1 - mask, 0 - background
    Returns run length as list
    """
    dots = np.where(x.T.flatten() == 1)[
        0
    ]  # .T sets Fortran order down-then-right
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return " ".join([str(i) for i in run_lengths])

This code below will generate a submission.
It reads images from `test` folder and gathers prediction from the trained model.
Check your submission before uploading it into `Kaggle`.

In [13]:
from PIL import Image
import pandas as pd

submission = {"ImageId": [], "EncodedPixels": []}
threshold = 0.5

test_transform = albu.Compose([
    albu.Resize(IMAGE_SIZE, IMAGE_SIZE),
    albu.Normalize(),
    ToTensor()
])


test_image_path = local_path / 'data3' / Path("test")
TEST_IMAGES = sorted(test_image_path.glob("*.png"))
test_dataset = SegmentationDataset(
    images=TEST_IMAGES,
    transforms=test_transform
)
test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
)

for prediction in runner.predict_loader(loader=test_loader):
    submission["ImageId"].extend(s[:-4] for s in prediction["filename"])
    submission["EncodedPixels"].extend(
        rle_encoding(torch.sigmoid(mask.cpu()).numpy().squeeze(0) > threshold) for mask in prediction["mask"]
    )

In [None]:
pd.DataFrame(submission).to_csv(f"sub_seg_{str(datetime.now().strftime('%Y%m%d-%H%M%S'))}.csv", index=False)