# Segmentation task

Hi! It's a segmentation task baseline notebook.
It include a data reader, baseline model and submission generator.

You should use GPU to train your model, so we recommend using [Kaggle Notebooks](https://www.kaggle.com/docs/notebooks).
To get maximum score of the task, your model should have IoU greater than `0.8`.

You can use everything, that suits into the rules in `README.md`.

In [None]:
# !pip install -U catalyst albumentations

In [None]:
# For Colab user: download dataset and upload zip files.
# If you use Kaggle Notebooks, you already have the dataset in a hard drive.

# !gdown https://drive.google.com/uc?id=19fBCItau0MP1ABKlBNkpj1pMxOzIZLML&export=download
# !gdown https://drive.google.com/uc?id=1X7TLVCvi2a57SyjAdExPppzakxfenRD0&export=download
# !unzip train.zip -d train
# !unzip test.zip -d test

In [None]:
from datetime import datetime
import numpy as np
from pathlib import Path
import pandas as pd

import torch
import torch.nn as nn
from torch.utils import data

import catalyst
from catalyst import dl
from catalyst.utils import metrics, imread, set_global_seed

In [None]:
set_global_seed(42)

## Dataset

Load train data. Don't forget to add test data. Use test data, to compare methods/models/etc.

In [None]:
train_image_path = Path("train") / "images"
train_mask_path = Path("train") / "masks"
ALL_IMAGES = sorted(train_image_path.glob("*.png"))
ALL_MASKS = sorted(train_mask_path.glob("*.png"))

In [None]:
from torch.utils.data import Dataset


class SegmentationDataset(Dataset):
    def __init__(self, images=None, masks=None, transforms=None) -> None:
        self.images = images
        self.masks = masks
        self.transforms = transforms

    def __len__(self) -> int:
        return len(self.images)

    def __getitem__(self, idx: int) -> dict:
        image_path = self.images[idx]
        image = imread(image_path)

        result = {"image": image}

        if self.masks is not None:
            result["mask"] = imread(self.masks[idx]).mean(2) // 255

        if self.transforms is not None:
            result = self.transforms(**result)
            if result.get("mask", None) is not None:
                result["mask"] = result["mask"].unsqueeze(0)

        result["filename"] = image_path.name
        result["image size"] = image.shape[:2]

        return result

## Augmentations

To train an accurate model for a segmentation task, you need a lot of data.
Use data augmentations to simulate a bigger dataset.

In [None]:
import albumentations as albu
from albumentations.pytorch import ToTensorV2 as ToTensor
import cv2

IMAGE_SIZE = 256
train_transform = albu.Compose([
    albu.HorizontalFlip(p=0.5),
    albu.Resize(IMAGE_SIZE, IMAGE_SIZE),
    albu.RandomResizedCrop(IMAGE_SIZE, IMAGE_SIZE, p=0.3),
    albu.Normalize(),
    ToTensor()
])

valid_transform = albu.Compose([
    albu.Resize(IMAGE_SIZE, IMAGE_SIZE),
    albu.Normalize(),
    ToTensor()
])

In [None]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

batch_size = 16
num_workers = 4

indices = np.arange(len(ALL_IMAGES))

train_indices, valid_indices = train_test_split(
    indices, test_size=0.2, random_state=42, shuffle=True
)

np_images = np.array(ALL_IMAGES)
np_masks = np.array(ALL_MASKS)

train_dataset = SegmentationDataset(
    images = np_images[train_indices].tolist(),
    masks = np_masks[train_indices].tolist(),
    transforms = train_transform
)

valid_dataset = SegmentationDataset(
    images = np_images[valid_indices].tolist(),
    masks = np_masks[valid_indices].tolist(),
    transforms = valid_transform
)

loaders = {
    "train": DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        drop_last=True,
    ),
    "valid": DataLoader(
        valid_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        drop_last=True,
    )
}


Our current baseline model is `U-Net`.
You can do anything with it: add pretrained backbone, make model wider or deeper or change a model architecture.
You can use `torchvision` module to create a backbone, but not a whole model.

In [None]:
class Baseline(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.down_1 = self.make_down_layer_(3, 64)
        self.down_2 = self.make_down_layer_(64, 128)
        self.down_3 = self.make_down_layer_(128, 256)
        self.down_4 = self.make_down_layer_(256, 512)

        self.up_1 = self.make_up_layer_(512, 256)
        self.up_2 = self.make_up_layer_(256, 128)
        self.up_3 = self.make_up_layer_(128, 64)
        self.up_4 = nn.Sequential(
            nn.ConvTranspose2d(64, 1, kernel_size=3, padding=1, stride=2, output_padding=1)
        )

    def make_down_layer_(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

    def make_up_layer_(self, in_channels, out_channels):
        return nn.ModuleList(
            [
                nn.ConvTranspose2d(
                    in_channels,
                    out_channels,
                    kernel_size=3,
                    stride=2,
                    padding=1,
                    output_padding=1,
                ),
                nn.BatchNorm2d(2 * out_channels),
                nn.ReLU(),
                nn.ConvTranspose2d(
                    2 * out_channels,
                    out_channels,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                ),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(),
            ]
        )

    def forward(self, image):
        x_1 = self.down_1(image)
        x_2 = self.down_2(x_1)
        x_3 = self.down_3(x_2)
        x_4 = self.down_4(x_3)

        u_1 = self.up_1[0](x_4)
        u_1 = torch.cat([x_3, u_1], axis=1)
        for m in self.up_1[1:]:
            u_1 = m(u_1)
        
        u_2 = self.up_2[0](u_1)
        u_2 = torch.cat([x_2, u_2], axis=1)
        for m in self.up_2[1:]:
            u_2 = m(u_2)

        u_3 = self.up_3[0](u_2)
        u_3 = torch.cat([x_1, u_3], axis=1)
        for m in self.up_3[1:]:
            u_3 = m(u_3)

        return self.up_4(u_3)

In [None]:
from catalyst.contrib.nn import DiceLoss, IoULoss
from catalyst.dl.runner import SupervisedRunner
from torch.nn.functional import interpolate


class SegmentationRunner(SupervisedRunner):
    def predict_batch(self, batch):
        prediction = {"filename": batch["filename"]}
        masks = self.model(batch[self.input_key].to(self.device))
        image_size = list(zip(*batch["image size"]))
        prediction["mask"] = [
            interpolate(mask.unsqueeze(0), image_size).squeeze(0)
            for mask, image_size in zip(masks, image_size)
        ]
        return prediction

# we have multiple criterions
model = Baseline()
criterion = {
    "dice": DiceLoss(),
    "iou": IoULoss(),
    "bce": nn.BCEWithLogitsLoss()
}
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=0.0003)

runner = SegmentationRunner(input_key="image", input_target_key="mask")

In [None]:
callbacks = [
    dl.CriterionCallback(
        input_key="mask", prefix="loss_dice", criterion_key="dice"
    ),
    dl.CriterionCallback(
        input_key="mask", prefix="loss_iou", criterion_key="iou"
    ),
    dl.CriterionCallback(
        input_key="mask", prefix="loss_bce", criterion_key="bce"
    ),
    dl.MetricAggregationCallback(
        prefix="loss",
        mode="weighted_sum",
        metrics={"loss_dice": 1.0, "loss_iou": 1.0, "loss_bce": 0.8},
    ),
    dl.DiceCallback(input_key="mask"),
    dl.IouCallback(input_key="mask"),
]

In [None]:
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    callbacks=callbacks,
    logdir=Path("logs") / datetime.now().strftime("%Y%m%d-%H%M%S"),
    num_epochs=50,
    main_metric="iou", # kaggle competition metric
    minimize_metric=False,
    verbose=True,
)

## Submission

To generate submission, you'll have to write masks for images.
Usually, in `Kaggle` segmentation competitions masks are encoded in the run length format.
For more information, check `Evaluation` page in `Overview`.

In [None]:
def rle_encoding(x):
    """
    x: numpy array of shape (height, width), 1 - mask, 0 - background
    Returns run length as list
    """
    dots = np.where(x.T.flatten() == 1)[
        0
    ]  # .T sets Fortran order down-then-right
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return " ".join([str(i) for i in run_lengths])

This code below will generate a submission.
It reads images from `test` folder and gathers prediction from the trained model.
Check your submission before uploading it into `Kaggle`.

In [None]:
from PIL import Image
import pandas as pd

submission = {"ImageId": [], "EncodedPixels": []}
threshold = 0.5

test_image_path = Path("test") / "images"
TEST_IMAGES = sorted(train_image_path.glob("*.png"))
test_dataset = SegmentationDataset(
    images=TEST_IMAGES,
    transforms=valid_transform
)
test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
)

for prediction in runner.predict_loader(loader=test_loader):
    submission["ImageId"].extend(s[:-4] for s in prediction["filename"])
    submission["EncodedPixels"].extend(
        rle_encoding(mask.cpu().numpy().squeeze(0) > threshold) for mask in prediction["mask"]
    )

In [None]:
pd.DataFrame(submission).to_csv("submission.csv", index=False)

In [None]:
pd.DataFrame(submission)