# Training Notebook (Catalyst)

### Experiments

* [x] Efficientnet b5 **PL:0.652**
* [x] FP 16，refer to this [catalyst tutorial](https://github.com/catalyst-team/catalyst/blob/master/examples/notebooks/segmentation-tutorial.ipynb)
    * The model will have gradient overflow after 5th epoch, everything else is okay
* [x] Saving & Loading from JIT **PL:0.655**
* [x] Ensemble
* [x] 384x576
* [x] polygon convex
* [ ] Test the funnel network again
* [ ] Ranger optimizer 
    * [ ] RADAM
    * [ ] Look Ahead
* [ ] Train all

### Installing Apex for FP16

```shell
git clone https://github.com/NVIDIA/apex
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex
is_fp16_used = True
```

### Other Installations

```
pip install catalyst
pip install pretrainedmodels
pip install git+https://github.com/qubvel/segmentation_models.pytorch
pip install pip pytorch-toolbelt
pip install torchvision==0.4
```

Our starter kernel is from [this open kernel](https://www.kaggle.com/artgor/segmentation-in-pytorch-using-convenient-tools)

## Importing libraries

In [1]:
import os
import cv2
import collections
import time 
import tqdm
from PIL import Image
from functools import partial
train_on_gpu = True

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import torchvision
import torchvision.transforms as transforms
import torch
from torch.utils.data import TensorDataset, DataLoader,Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR

import albumentations as albu
from albumentations import pytorch as AT

from catalyst.data import Augmentor
from catalyst.dl import utils
from catalyst.data.reader import ImageReader, ScalarReader, ReaderCompose, LambdaReader
from catalyst.dl.runner import SupervisedRunner
from catalyst.contrib.models.segmentation import Unet
from catalyst.dl.callbacks import DiceCallback, EarlyStoppingCallback, InferCallback, CheckpointCallback

import segmentation_models_pytorch as smp
from onecyclelr import OneCycleLR

In [2]:
from catalyst.contrib.schedulers.onecycle import OneCycleLRWithWarmup

## Helper functions and classes

In [3]:
#Train
TRAIN = True
RANGER = True
FUNNEL = False
FP16 = False # Do we use half precision?
fp16_params = dict(opt_level = "O2") if FP16 else None

bs = 20 if FP16 else 12
EPOCHES = 60
TEST_PROPORTION = 0.01

DECODE_LR = 1e-2
ENCODE_LR = 1e-3


ONECYCLELR = False
LOGDIR = "./logs/segmentation_b5_ranger_one_cycle"



#Fine-tune
LOAD = False
LOAD_PATH = "cata-eff-b5.pth"

# Structure

ENCODER = 'efficientnet-b5' # Encoder model name
ENCODER_WEIGHTS = 'imagenet' # Encoder pretrained weights
DEVICE = 'cuda' 
ACTIVATION = None


MIN_SIZE_RANGE = 3
MIN_SIZE = [0, 100, 1200, 5000,8000, 10000,12000][-MIN_SIZE_RANGE:]

SIZE = (384,576)
INPUT_SIZE = tuple(d*2 for d in SIZE) if FUNNEL else SIZE
OUTPUT_SIZE = SIZE

#JIT

JIT_PRED = False
# Ensemble Model Path List
JIT_PATHS = [
    "jit/jit_fpn_se_resnext.pth", # 0.659
]


In [4]:
print(INPUT_SIZE,OUTPUT_SIZE)

(384, 576) (384, 576)


In [5]:
def get_img(x, folder: str='train_images'):
    """
    Return image based on image name and folder.
    """
    data_folder = f"{path}/{folder}"
    image_path = os.path.join(data_folder, x)
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img


def rle_decode(mask_rle: str = '', shape: tuple = (1400, 2100)):
    '''
    Decode rle encoded mask.
    
    :param mask_rle: run-length as string formatted (start length)
    :param shape: (height, width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')


def make_mask(df: pd.DataFrame, image_name: str='img.jpg', shape: tuple = (1400, 2100)):
    """
    Create mask based on df, image name and shape.
    """
    encoded_masks = df.loc[df['im_id'] == image_name, 'EncodedPixels']
    masks = np.zeros((shape[0], shape[1], 4), dtype=np.float32)

    for idx, label in enumerate(encoded_masks.values):
        if label is not np.nan:
            mask = rle_decode(label)
            masks[:, :, idx] = mask
            
    return masks


def to_tensor(x, **kwargs):
    """
    Convert image or mask.
    """
    return x.transpose(2, 0, 1).astype('float32')


def mask2rle(img):
    '''
    Convert mask to rle.
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def visualize(image, mask, original_image=None, original_mask=None):
    """
    Plot image and masks.
    If two pairs of images and masks are passes, show both.
    """
    fontsize = 14
    class_dict = {0: 'Fish', 1: 'Flower', 2: 'Gravel', 3: 'Sugar'}
    
    if original_image is None and original_mask is None:
        f, ax = plt.subplots(1, 5, figsize=(24, 24))

        ax[0].imshow(image)
        for i in range(4):
            ax[i + 1].imshow(mask[:, :, i])
            ax[i + 1].set_title(f'Mask {class_dict[i]}', fontsize=fontsize)
    else:
        f, ax = plt.subplots(2, 5, figsize=(24, 12))

        ax[0, 0].imshow(original_image)
        ax[0, 0].set_title('Original image', fontsize=fontsize)
                
        for i in range(4):
            ax[0, i + 1].imshow(original_mask[:, :, i])
            ax[0, i + 1].set_title(f'Original mask {class_dict[i]}', fontsize=fontsize)
        
        ax[1, 0].imshow(image)
        ax[1, 0].set_title('Transformed image', fontsize=fontsize)
        
        
        for i in range(4):
            ax[1, i + 1].imshow(mask[:, :, i])
            ax[1, i + 1].set_title(f'Transformed mask {class_dict[i]}', fontsize=fontsize)
            
            
def visualize_with_raw(image, mask, original_image=None, original_mask=None, raw_image=None, raw_mask=None):
    """
    Plot image and masks.
    If two pairs of images and masks are passes, show both.
    """
    fontsize = 14
    class_dict = {0: 'Fish', 1: 'Flower', 2: 'Gravel', 3: 'Sugar'}

    f, ax = plt.subplots(3, 5, figsize=(24, 12))

    ax[0, 0].imshow(original_image)
    ax[0, 0].set_title('Original image', fontsize=fontsize)

    for i in range(4):
        ax[0, i + 1].imshow(original_mask[:, :, i])
        ax[0, i + 1].set_title(f'Original mask {class_dict[i]}', fontsize=fontsize)


    ax[1, 0].imshow(raw_image)
    ax[1, 0].set_title('Original image', fontsize=fontsize)

    for i in range(4):
        ax[1, i + 1].imshow(raw_mask[:, :, i])
        ax[1, i + 1].set_title(f'Raw predicted mask {class_dict[i]}', fontsize=fontsize)
        
    ax[2, 0].imshow(image)
    ax[2, 0].set_title('Transformed image', fontsize=fontsize)


    for i in range(4):
        ax[2, i + 1].imshow(mask[:, :, i])
        ax[2, i + 1].set_title(f'Predicted mask with processing {class_dict[i]}', fontsize=fontsize)
            
            
def plot_with_augmentation(image, mask, augment):
    """
    Wrapper for `visualize` function.
    """
    augmented = augment(image=image, mask=mask)
    image_flipped = augmented['image']
    mask_flipped = augmented['mask']
    visualize(image_flipped, mask_flipped, original_image=image, original_mask=mask)
    
    
sigmoid = lambda x: 1 / (1 + np.exp(-x))


def post_process(probability, threshold, min_size):
    """
    Post processing of each predicted mask, components with lesser number of pixels
    than `min_size` are ignored
    """
    # don't remember where I saw it
    mask = cv2.threshold(probability, threshold, 1, cv2.THRESH_BINARY)[1]
    num_component, component = cv2.connectedComponents(mask.astype(np.uint8))
    predictions = np.zeros((350, 525), np.float32)
    num = 0
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            predictions[p] = 1
            num += 1
    return predictions, num


def get_training_augmentation():
    train_transform = [

        albu.Blur(p=0.5),
        albu.HorizontalFlip(p=0.5),
        albu.VerticalFlip(p=0.5),
        albu.RandomBrightness(p=0.5),
        albu.RandomContrast(p=0.5),
        albu.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=0.5, border_mode=0),
        albu.GridDistortion(p=0.5),
        albu.OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5),
        albu.Resize(*INPUT_SIZE)
    ]
    return albu.Compose(train_transform)


def get_validation_augmentation():
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        albu.Resize(*INPUT_SIZE)
    ]
    return albu.Compose(test_transform)


def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callbale): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    
    """
    
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)


def dice(img1, img2):
    img1 = np.asarray(img1).astype(np.bool)
    img2 = np.asarray(img2).astype(np.bool)

    intersection = np.logical_and(img1, img2)

    return 2. * intersection.sum() / (img1.sum() + img2.sum())

## Data overview

Let's have a look at the data first.

In [6]:
from pathlib import Path
HOME = Path(os.environ["HOME"])

In [7]:
path = '/data2/nelson_projs/ucsi'
os.listdir(path)

['sample_submission.csv',
 'train.csv',
 'test_images',
 'train_images',
 'models',
 'segmentation_effb5',
 'jit',
 'logs']

We have folders with train and test images, file with train image ids and masks and sample submission.

In [8]:
train = pd.read_csv(f'{path}/train.csv')
sub = pd.read_csv(f'{path}/sample_submission.csv')

In [9]:
train.head()

Unnamed: 0,Image_Label,EncodedPixels
0,0011165.jpg_Fish,264918 937 266318 937 267718 937 269118 937 27...
1,0011165.jpg_Flower,1355565 1002 1356965 1002 1358365 1002 1359765...
2,0011165.jpg_Gravel,
3,0011165.jpg_Sugar,
4,002be4f.jpg_Fish,233813 878 235213 878 236613 878 238010 881 23...


In [10]:
n_train = len(os.listdir(f'{path}/train_images'))
n_test = len(os.listdir(f'{path}/test_images'))
print(f'There are {n_train} images in train dataset')
print(f'There are {n_test} images in test dataset')

There are 5546 images in train dataset
There are 3698 images in test dataset


In [11]:
train['Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()

Sugar     5546
Flower    5546
Fish      5546
Gravel    5546
Name: Image_Label, dtype: int64

So we have ~5.5k images in train dataset and they can have up to 4 masks: Fish, Flower, Gravel and Sugar.

In [12]:
train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()

Sugar     3751
Gravel    2939
Fish      2781
Flower    2365
Name: Image_Label, dtype: int64

In [13]:
train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[0]).value_counts().value_counts()

2    2372
3    1560
1    1348
4     266
Name: Image_Label, dtype: int64

But there are a lot of empty masks. In fact only 266 images have all four masks. It is important to remember this.

In [14]:
train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])


sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1])
sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0])

Let's have a look at the images and the masks.

In [15]:
#fig = plt.figure(figsize=(25, 16))
#for j, im_id in enumerate(np.random.choice(train['im_id'].unique(), 4)):
#    for i, (idx, row) in enumerate(train.loc[train['im_id'] == im_id].iterrows()):
#        ax = fig.add_subplot(5, 4, j * 4 + i + 1, xticks=[], yticks=[])
#        im = Image.open(f"{path}/train_images/{row['Image_Label'].split('_')[0]}")
#        plt.imshow(im)
#       mask_rle = row['EncodedPixels']
#        try: # label might not be there!
#            mask = rle_decode(mask_rle)
#        except:
#            mask = np.zeros((1400, 2100))
#        plt.imshow(mask, alpha=0.5, cmap='gray')
#        ax.set_title(f"Image: {row['Image_Label'].split('_')[0]}. Label: {row['label']}")

We can see that masks can overlap. Also we can see that clouds are really similar to fish, flower and so on. Another important point: masks are often quite big and can have seemingly empty areas.

## Preparing data for modelling

At first, let's create a list of unique image ids and the count of masks for images. This will allow us to make a stratified split based on this count.

In [16]:
id_mask_count = train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[0]).value_counts().\
reset_index().rename(columns={'index': 'img_id', 'Image_Label': 'count'})
train_ids, valid_ids = train_test_split(id_mask_count['img_id'].values, random_state=42, stratify=id_mask_count['count'], test_size=TEST_PROPORTION)
test_ids = sub['Image_Label'].apply(lambda x: x.split('_')[0]).drop_duplicates().values

## Exploring augmentations with albumentations

One of important things while working with images is choosing good augmentations. There are a lot of them, let's have a look at augmentations from albumentations!

In [17]:
#image_name = '8242ba0.jpg'
#image = get_img(image_name)
#mask = make_mask(train, image_name)

In [18]:
#visualize(image, mask)

This is how original image and its masks look like. Let's try adding some augmentations

In [19]:
#plot_with_augmentation(image, mask, albu.HorizontalFlip(p=1))

In [20]:
#plot_with_augmentation(image, mask, albu.VerticalFlip(p=1))

In [21]:
#plot_with_augmentation(image, mask, albu.RandomRotate90(p=1))

In [22]:
#plot_with_augmentation(image, mask, albu.ElasticTransform(p=1, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03))

In [23]:
#plot_with_augmentation(image, mask, albu.GridDistortion(p=1))

In [24]:
#plot_with_augmentation(image, mask, albu.OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5))

## Setting up data for training in Catalyst

In [25]:
class CloudDataset(Dataset):
    def __init__(self, df: pd.DataFrame = None, datatype: str = 'train', img_ids: np.array = None,
                 transforms = albu.Compose([albu.HorizontalFlip(),AT.ToTensor()]),
                preprocessing=None):
        self.df = df
        if datatype != 'test':
            self.data_folder = f"{path}/train_images"
        else:
            self.data_folder = f"{path}/test_images"
        self.img_ids = img_ids
        self.transforms = transforms
        if FUNNEL:
            self.funnel = albu.Resize(*OUTPUT_SIZE)
        self.preprocessing = preprocessing

    def __getitem__(self, idx):
        image_name = self.img_ids[idx]
        mask = make_mask(self.df, image_name)
        image_path = os.path.join(self.data_folder, image_name)
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask']
        if FUNNEL:
                mask = self.funnel(image = img,mask = mask)['mask']
#                 print(mask.size)
#         print(img.shape,mask.shape)
        if self.preprocessing:
            preprocessed = self.preprocessing(image=img, mask=mask)
            img = preprocessed['image']
            mask = preprocessed['mask']
#             print(img.shape,mask.shape)
            
        return img,mask

    def __len__(self):
        return len(self.img_ids)

Now we define model and training parameters

In [26]:
if FUNNEL:
    print("Using funnel model")
    class funnelBlock(nn.Module):
        def __init__(self, middle = 12):
            super().__init__()
            self.layers = nn.Sequential(*[
                nn.Conv2d(3,middle, kernel_size=3, padding = 3//2),
                nn.BatchNorm2d(middle),
                nn.Conv2d(middle,3,kernel_size=3, padding = 3//2, stride=(2,2)),
            ])
        def forward(self,x):
            return self.layers(x)
            
    class funnelModel(nn.Module):
        def __init__(self):
            super().__init__()
            self.funnel = funnelBlock()
            self.backbone = smp.FPN(encoder_name=ENCODER, encoder_weights=ENCODER_WEIGHTS, 
                        classes=4, activation=ACTIVATION,)
            
        def forward(self, x):
            return self.backbone(self.funnel(x))
        
    model = funnelModel()
    
else:
    print("Not using funnel model")
    model = smp.FPN(
        encoder_name=ENCODER, 
        encoder_weights=ENCODER_WEIGHTS, 
        classes=4, 
        activation=ACTIVATION,
    )
preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

Not using funnel model


In [27]:
num_workers = 8

train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms = get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms = get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))

train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=num_workers)

loaders = {
    "train": train_loader,
    "valid": valid_loader
}


Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().



In [28]:
# x,y = next(iter(train_loader))

# x.size(),y.size()

In [29]:
from catalyst.contrib.optimizers import RAdam, Lookahead

num_epochs = EPOCHES
logdir = LOGDIR

# model, criterion, optimizer
opt_class = RAdam if RANGER else torch.optim.Adam

op_list = [
    {'params': model.backbone.decoder.parameters() if FUNNEL else model.decoder.parameters(), 'lr': DECODE_LR}, 
    {'params': model.backbone.encoder.parameters() if FUNNEL else model.encoder.parameters(), 'lr': ENCODE_LR},  # Pretrained section of the model using smaller lr
]
if FUNNEL:
    op_list.append({'params': model.funnel.parameters(), 'lr': DECODE_LR})
    
optimizer_ = opt_class(op_list, weight_decay=3e-4)
optimizer = Lookahead(optimizer_) if RANGER else optimizer_

if ONECYCLELR:
    scheduler = OneCycleLRWithWarmup(optimizer,num_steps=EPOCHES, lr_range=(lr, lr/10),init_lr=(lr/10),decay_steps=5)
    print("using oneCycleLR")
else:
    scheduler = ReduceLROnPlateau(optimizer, factor=0.22, patience=2)
    print("using ReduceLROnPlateau")
    
criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
runner = SupervisedRunner()

using ReduceLROnPlateau


In [30]:
model = model.cuda()

In [31]:
if LOAD:
    model.load_state_dict(torch.load(LOAD_PATH))

## Model training

In [32]:
if TRAIN:
    runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    #callbacks=[DiceCallback()],
    callbacks=[DiceCallback(), EarlyStoppingCallback(patience=10, min_delta=0.001)],
    logdir=logdir,
    num_epochs=num_epochs,
    fp16=fp16_params,
    verbose=True
)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

1/60 * Epoch (train): 100% 458/458 [07:22<00:00,  1.04it/s, dice=0.356, loss=0.968]
1/60 * Epoch (valid): 100% 5/5 [00:02<00:00,  1.71it/s, dice=0.372, loss=1.274]
[2019-11-09 15:24:10,651] 
1/60 * Epoch 1 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=41.4037 | _timers/batch_time=0.3304 | _timers/data_time=0.1770 | _timers/model_time=0.1532 | dice=0.4245 | loss=0.9613
1/60 * Epoch 1 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=153.8622 | _timers/batch_time=0.4238 | _timers/data_time=0.3974 | _timers/model_time=0.0263 | dice=0.3773 | loss=1.2826
2/60 * Epoch (train): 100% 458/458 [07:22<00:00,  1.04it/s, dice=0.533, loss=0.713]
2/60 * Epoch (valid): 100% 5/5 [00:03<00:00,  1.64it/s, dice=0.484, loss=0.941]
[2019-11-09 15:31:39,294] 
2/60 * Epoch 2 (train): _base/lr=0.0100 | _base/momentum=0.9000 | _timers/_fps=41.8619 | _timers/batch_time=0.3305 | _timers/data_time=0.1812 | _timers/model_time=0.1491 | dice=0.4597 | loss=0.8716
2/60 * Epoch 2 (valid):

In [33]:
#torch.save(runner.model.state_dict(),"cata-eff-b5-v2.pth")

In [34]:
#utils.plot_metrics(
#    logdir=logdir, 
#    pecify which metrics we want to plot
#    metrics=["loss", "dice", 'lr', '_base/lr']
#)

In [35]:
#runner.model = runner.model.eval()

In [36]:
#traced = torch.jit.trace(runner.model, torch.rand(2,3,384,576).cuda())

In [37]:
#traced.save("jit/jit-cata-eff-b5-v2.pth")

## Exploring predictions
Let's make predictions on validation dataset.

At first we need to optimize thresholds 

In [38]:
#JIT_PATHS = [
#    "jit/jit-cata-eff-b5-v2.pth"
#]
#
JIT_PRED = False
# INFER_BS = 28

In [39]:
infer_cb = []

In [40]:
class ensModel(nn.Module):
    def __init__(self, models):
        super().__init__()
        self.models = models
    
    def __call__(self, x):
        res = []
        x = x.cuda()
        with torch.no_grad():
            for m in self.models:
                res.append(m(x))
        res = torch.stack(res)
        return torch.mean(res, dim=0)

In [41]:
if JIT_PRED:
    models = list(torch.jit.load(p) for p in JIT_PATHS)
    model = ensModel(models)
else:
    infer_cb.append(CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),)
infer_cb.append(InferCallback())

In [None]:
encoded_pixels = []
loaders = {"infer": valid_loader}
runner.infer(
    model=model,
    loaders=loaders,
    callbacks=infer_cb,
)
valid_masks = []
probabilities = np.zeros((2220, 350, 525))
for i, (batch, output) in enumerate(tqdm.tqdm(zip(
        valid_dataset, runner.callbacks[0].predictions["logits"]))):
    image, mask = batch
    for m in mask:
        if m.shape != (350, 525):
            m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
        valid_masks.append(m)

    for j, probability in enumerate(output):
        if probability.shape != (350, 525):
            probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
        probabilities[i * 4 + j, :, :] = probability

=> loading checkpoint ./logs/segmentation_b5_ranger_one_cycle/checkpoints/best.pth
loaded checkpoint ./logs/segmentation_b5_ranger_one_cycle/checkpoints/best.pth (epoch 2)


## Find optimal values

First of all, my thanks to @samusram for finding a mistake in my validation
https://www.kaggle.com/c/understanding_cloud_organization/discussion/107711#622412

And now I find optimal values separately for each class.

In [None]:
class_params = {}
for class_id in range(4):
    print(class_id)
    attempts = []
    for t in range(0, 100, 5):
        t /= 100
        for ms in [0, 100, 1200, 5000, 10000]:
            masks = []
            for i in range(class_id, len(probabilities), 4):
                probability = probabilities[i]
                predict, num_predict = post_process(sigmoid(probability), t, ms)
                masks.append(predict)

            d = []
            for i, j in zip(masks, valid_masks[class_id::4]):
                if (i.sum() == 0) & (j.sum() == 0):
                    d.append(1)
                else:
                    d.append(dice(i, j))

            attempts.append((t, ms, np.mean(d)))

    attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice'])


    attempts_df = attempts_df.sort_values('dice', ascending=False)
    print(attempts_df.head())
    best_threshold = attempts_df['threshold'].values[0]
    best_size = attempts_df['size'].values[0]
    
    class_params[class_id] = (best_threshold, best_size)

In [None]:
print(class_params)

In [None]:
#sns.lineplot(x='threshold', y='dice', hue='size', data=attempts_df);
#plt.title('Threshold and min size vs dice for one of the classes');

Now let's have a look at our masks.

In [None]:
#for i, (input, output) in enumerate(zip(
#        valid_dataset, runner.callbacks[0].predictions["logits"])):
#    image, mask = input
#        
#    image_vis = image.transpose(1, 2, 0)
#    mask = mask.astype('uint8').transpose(1, 2, 0)
#    pr_mask = np.zeros((350, 525, 4))
#    for j in range(4):
#        probability = cv2.resize(output.transpose(1, 2, 0)[:, :, j], dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
#        pr_mask[:, :, j], _ = post_process(sigmoid(probability), class_params[j][0], class_params[j][1])
    #pr_mask = (sigmoid(output) > best_threshold).astype('uint8').transpose(1, 2, 0)
    
        
#    visualize_with_raw(image=image_vis, mask=pr_mask, original_image=image_vis, original_mask=mask, raw_image=image_vis, raw_mask=output.transpose(1, 2, 0))
    
#    if i >= 2:
#        break

## Predicting

In [None]:
import gc
torch.cuda.empty_cache()
gc.collect()

In [None]:
test_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids, transforms = get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False, num_workers=0)

loaders = {"test": test_loader}

In [None]:
encoded_pixels = []
image_id = 0
for i, test_batch in enumerate(tqdm.tqdm(loaders['test'])):
    runner_out = runner.predict_batch({"features": test_batch[0].cuda()})['logits']
    for i, batch in enumerate(runner_out):
        for probability in batch:
            
            probability = probability.cpu().detach().numpy()
            if probability.shape != (350, 525):
                probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
            predict, num_predict = post_process(sigmoid(probability), class_params[image_id % 4][0], class_params[image_id % 4][1])
            if num_predict == 0:
                encoded_pixels.append('')
            else:
                r = mask2rle(predict)
                encoded_pixels.append(r)
            image_id += 1

In [None]:
from datetime import datetime

In [None]:
sub['EncodedPixels'] = encoded_pixels
sub.to_csv('%s_submission.csv'%(int(datetime.now().timestamp())), columns=['Image_Label', 'EncodedPixels'], index=False)