# Training Notebook (Catalyst)

### Experiments

* [x] Efficientnet b5 **PL:0.652**
* [x] FP 16，refer to this [catalyst tutorial](https://github.com/catalyst-team/catalyst/blob/master/examples/notebooks/segmentation-tutorial.ipynb)
    * The model will have gradient overflow after 5th epoch, everything else is okay
* [x] Saving & Loading from JIT **PL:0.655**
* [x] Ensemble
* [x] 384x576
* [x] polygon convex
* [ ] Test the funnel network again
* [ ] Ranger optimizer 
    * [ ] RADAM
    * [ ] Look Ahead
* [ ] Train all

### Installing Apex for FP16

```shell
git clone https://github.com/NVIDIA/apex
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex
is_fp16_used = True
```

### Other Installations

```
pip install catalyst
pip install pretrainedmodels
pip install git+https://github.com/qubvel/segmentation_models.pytorch
pip install pip pytorch-toolbelt
pip install torchvision==0.4
```

Our starter kernel is from [this open kernel](https://www.kaggle.com/artgor/segmentation-in-pytorch-using-convenient-tools)

## Importing libraries

In [1]:
import os
import cv2
import collections
import time 
import tqdm
from PIL import Image
from functools import partial
train_on_gpu = True

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import torchvision
import torchvision.transforms as transforms
import torch
from torch.utils.data import TensorDataset, DataLoader,Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR

import albumentations as albu
from albumentations import pytorch as AT

from catalyst.data import Augmentor
from catalyst.dl import utils
from catalyst.data.reader import ImageReader, ScalarReader, ReaderCompose, LambdaReader
from catalyst.dl.runner import SupervisedRunner
from catalyst.contrib.models.segmentation import Unet
from catalyst.dl.callbacks import DiceCallback, EarlyStoppingCallback, InferCallback, CheckpointCallback

import segmentation_models_pytorch as smp

## Helper functions and classes

In [2]:
TRAIN = True
FUNNEL = True
RANGER = False

LOAD = False
LOAD_PATH = "cata-eff-b5.pth"
FP16 = False # Do we use half precision?
fp16_params = dict(opt_level = "O2") if FP16 else None

bs = 4 if FP16 else 2

LOAD = False # Do we load a trained weights at the beginning
LOAD_PATH = "cata-eff-b5.pth" # The model weight path, if we load a trained weights at the begining

ENCODER = 'efficientnet-b5' # Encoder model name
ENCODER_WEIGHTS = 'imagenet' # Encoder pretrained weights
DEVICE = 'cuda' 

ACTIVATION = None

MIN_SIZE_RANGE = 3
MIN_SIZE = [0, 100, 1200, 5000,8000, 10000,12000][-MIN_SIZE_RANGE:]

SIZE = (384,576)
INPUT_SIZE = tuple(d*2 for d in SIZE) if FUNNEL else SIZE
OUTPUT_SIZE = SIZE

# Are we using train dataset to find the threshold
FIND_TRAIN = True
# How much percentage of train dataset are we using?
SAMPLE_RATIO = .4

JIT_PRED = False
# Ensemble Model Path List
JIT_PATHS = [
    "jit/jit_fpn_se_resnext.pth", # 0.659
]


In [3]:
print(INPUT_SIZE,OUTPUT_SIZE)

((768, 1152), (384, 576))

In [4]:
from utils_ucsi import *

## Data overview

Let's have a look at the data first.

In [5]:
from pathlib import Path
HOME = Path(os.environ["HOME"])

In [6]:
path = HOME/'ucsi'
os.listdir(path)

['convex_1106_043937_submission.csv',
 'jit_try_submission.csv',
 'train_fastai_nofunnel.ipynb',
 'logs',
 'convex_1107_063453_submission.csv',
 'jit',
 'None',
 'unet_fpn_train_fp16_fastai_b5v2.ipynb',
 'polygon_cpu.ipynb',
 '1107_063453_submission.csv',
 '.gitignore',
 'LICENSE',
 'test_images',
 'convex_sub_resnext.csv',
 'train_images',
 'polygon_cpu.py',
 'ref_b5_fp16.ipynb',
 'catalyst_train.ipynb',
 'sub_resnext.csv',
 'train_prototype.ipynb',
 'unet_fpn_train_fp16_fastai_err.ipynb',
 'cata-eff-b5.pth',
 '1106_043937_submission.csv',
 'README.md',
 'doc',
 '1106_034552_submission.csv',
 '.ipynb_checkpoints',
 'train.csv',
 'ranger.py',
 '__pycache__',
 'segmentation-in-pytorch-using-convenient-tools.ipynb',
 'fastai',
 'unet_fpn_train_fp16_fastai.ipynb',
 'pred_basic.ipynb',
 'le_jit-Copy1.ipynb',
 'catalyst_ensemble.ipynb',
 'ref_b5.ipynb',
 'requirements.txt',
 'cata-eff-b5-v2.pth',
 '.git',
 'convex_1106_034552_submission.csv',
 'sample_submission.csv',
 'le_jit.ipynb']

We have folders with train and test images, file with train image ids and masks and sample submission.

In [7]:
train = pd.read_csv(f'{path}/train.csv')
sub = pd.read_csv(f'{path}/sample_submission.csv')

In [8]:
train.head()

Unnamed: 0,Image_Label,EncodedPixels
0,0011165.jpg_Fish,264918 937 266318 937 267718 937 269118 937 27...
1,0011165.jpg_Flower,1355565 1002 1356965 1002 1358365 1002 1359765...
2,0011165.jpg_Gravel,
3,0011165.jpg_Sugar,
4,002be4f.jpg_Fish,233813 878 235213 878 236613 878 238010 881 23...


In [9]:
n_train = len(os.listdir(f'{path}/train_images'))
n_test = len(os.listdir(f'{path}/test_images'))
print(f'There are {n_train} images in train dataset')
print(f'There are {n_test} images in test dataset')

There are 5546 images in train dataset
There are 3698 images in test dataset


In [10]:
train['Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()

Gravel    5546
Sugar     5546
Fish      5546
Flower    5546
Name: Image_Label, dtype: int64

So we have ~5.5k images in train dataset and they can have up to 4 masks: Fish, Flower, Gravel and Sugar.

In [11]:
train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()

Sugar     3751
Gravel    2939
Fish      2781
Flower    2365
Name: Image_Label, dtype: int64

In [12]:
train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[0]).value_counts().value_counts()

2    2372
3    1560
1    1348
4     266
Name: Image_Label, dtype: int64

But there are a lot of empty masks. In fact only 266 images have all four masks. It is important to remember this.

In [13]:
train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])


sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1])
sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0])

Let's have a look at the images and the masks.

In [14]:
#fig = plt.figure(figsize=(25, 16))
#for j, im_id in enumerate(np.random.choice(train['im_id'].unique(), 4)):
#    for i, (idx, row) in enumerate(train.loc[train['im_id'] == im_id].iterrows()):
#        ax = fig.add_subplot(5, 4, j * 4 + i + 1, xticks=[], yticks=[])
#        im = Image.open(f"{path}/train_images/{row['Image_Label'].split('_')[0]}")
#        plt.imshow(im)
#       mask_rle = row['EncodedPixels']
#        try: # label might not be there!
#            mask = rle_decode(mask_rle)
#        except:
#            mask = np.zeros((1400, 2100))
#        plt.imshow(mask, alpha=0.5, cmap='gray')
#        ax.set_title(f"Image: {row['Image_Label'].split('_')[0]}. Label: {row['label']}")

We can see that masks can overlap. Also we can see that clouds are really similar to fish, flower and so on. Another important point: masks are often quite big and can have seemingly empty areas.

## Preparing data for modelling

At first, let's create a list of unique image ids and the count of masks for images. This will allow us to make a stratified split based on this count.

In [15]:
id_mask_count = train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[0]).value_counts().\
reset_index().rename(columns={'index': 'img_id', 'Image_Label': 'count'})
train_ids, valid_ids = train_test_split(id_mask_count['img_id'].values, random_state=42, stratify=id_mask_count['count'], test_size=0.1)
test_ids = sub['Image_Label'].apply(lambda x: x.split('_')[0]).drop_duplicates().values

## Setting up data for training in Catalyst

In [24]:
class CloudDataset(Dataset):
    def __init__(self, df: pd.DataFrame = None, datatype: str = 'train', img_ids: np.array = None,
                 transforms = albu.Compose([albu.HorizontalFlip(),AT.ToTensor()]),
                preprocessing=None):
        self.df = df
        if datatype != 'test':
            self.data_folder = f"{path}/train_images"
        else:
            self.data_folder = f"{path}/test_images"
        self.img_ids = img_ids
        self.transforms = transforms
        if FUNNEL:
            self.funnel = albu.Resize(*OUTPUT_SIZE)
        self.preprocessing = preprocessing

    def __getitem__(self, idx):
        image_name = self.img_ids[idx]
        mask = make_mask(self.df, image_name)
        image_path = os.path.join(self.data_folder, image_name)
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask']
        if FUNNEL:
                mask = self.funnel(image = img,mask = mask)['mask']

        if self.preprocessing:
            preprocessed = self.preprocessing(image=img, mask=mask)
            img = preprocessed['image']
            mask = preprocessed['mask']
#             print(img.shape,mask.shape)
            
        return img,mask

    def __len__(self):
        return len(self.img_ids)

Now we define model and training parameters

In [25]:
ENCODER = 'efficientnet-b5'
ENCODER_WEIGHTS = 'imagenet'
DEVICE = 'cuda'

ACTIVATION = None

model = smp.FPN(
        encoder_name=ENCODER, 
        encoder_weights=ENCODER_WEIGHTS, 
        classes=4, 
        activation=ACTIVATION,
    )
preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

Using funnel model


In [26]:
num_workers = 0

train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms = get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms = get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))

train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=num_workers)

loaders = {
    "train": train_loader,
    "valid": valid_loader
}

SAMPLE_NUMBER = int(SAMPLE_RATIO * len(train_dataset))
print("sampling %s for threshold finding"%(SAMPLE_NUMBER))


Using lambda is incompatible with multiprocessing. Consider using regular functions or partial().



In [27]:
# x,y = next(iter(train_loader))

# x.size(),y.size()

In [28]:
from catalyst.contrib.optimizers import RAdam, Lookahead

num_epochs = 40
logdir = "./logs/segmentation"

# model, criterion, optimizer
opt_class = RAdam if RANGER else torch.optim.Adam

op_list = [
    {'params': model.backbone.decoder.parameters() if FUNNEL else model.decoder.parameters(), 'lr': 1e-2}, 
    {'params': model.backbone.encoder.parameters() if FUNNEL else model.encoder.parameters(), 'lr': 1e-3},  # Pretrained section of the model using smaller lr
]
if FUNNEL:
    op_list.append({'params': model.funnel.parameters(), 'lr': 1e-2})
    
optimizer_ = opt_class(op_list, weight_decay=3e-4)
optimizer = Lookahead(optimizer_) if RANGER else optimizer_
scheduler = ReduceLROnPlateau(optimizer, factor=0.18, patience=2)
#scheduler = ReduceLROnPlateau(optimizer, factor=0.25, patience=2)
criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
runner = SupervisedRunner()

In [29]:
model = model.cuda()

In [30]:
if LOAD:
    model.load_state_dict(torch.load(LOAD_PATH))

## Model training

In [None]:
if TRAIN:
    runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    callbacks=[DiceCallback()],
    #callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)],
    logdir=logdir,
    num_epochs=num_epochs,
    fp16=fp16_params,
    verbose=True
)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

1/40 * Epoch (train):  11% 280/2496 [06:05<47:15,  1.28s/it, dice=0.247, loss=1.039]    

In [None]:
#torch.save(runner.model.state_dict(),"cata-eff-b5-v2.pth")

In [None]:
#utils.plot_metrics(
#    logdir=logdir, 
#    pecify which metrics we want to plot
#    metrics=["loss", "dice", 'lr', '_base/lr']
#)

In [None]:
#runner.model = runner.model.eval()

In [None]:
#traced = torch.jit.trace(runner.model, torch.rand(2,3,384,576).cuda())

In [None]:
#traced.save("jit/jit-cata-eff-b5-v2.pth")

## Exploring predictions
Let's make predictions on validation dataset.

At first we need to optimize thresholds 

In [None]:
#JIT_PATHS = [
#    "jit/jit-cata-eff-b5-v2.pth"
#]
#
JIT_PRED = False
# INFER_BS = 28

In [None]:
infer_cb = []

In [None]:
class ensModel(nn.Module):
    def __init__(self, models):
        super().__init__()
        self.models = models
    
    def __call__(self, x):
        res = []
        x = x.cuda()
        with torch.no_grad():
            for m in self.models:
                res.append(m(x))
        res = torch.stack(res)
        return torch.mean(res, dim=0)

In [None]:
from catalyst.dl.core import Callback, CallbackOrder, RunnerState
from collections import defaultdict

# A modified version to save memory when do the inference
class InferCallback(Callback):
    def __init__(self, out_dir=None, out_prefix=None):
        super().__init__(CallbackOrder.Internal)
        self.out_dir = out_dir
        self.out_prefix = out_prefix
        self.predictions = defaultdict(lambda: [])
        self._keys_from_state = ["out_dir", "out_prefix"]

    def on_stage_start(self, state: RunnerState):
        for key in self._keys_from_state:
            value = getattr(state, key, None)
            if value is not None:
                setattr(self, key, value)
        # assert self.out_prefix is not None
        if self.out_dir is not None:
            self.out_prefix = str(self.out_dir) + "/" + str(self.out_prefix)
        if self.out_prefix is not None:
            os.makedirs(os.path.dirname(self.out_prefix), exist_ok=True)

    def on_loader_start(self, state: RunnerState):
        self.predictions = {"logits":list()}
    
    def on_batch_end(self, state: RunnerState):
        dct = state.output
        dct = {key: value.detach().cpu().numpy() for key, value in dct.items()}
        for key, value in dct.items():
            pred = np.zeros((len(value)*4, 350, 525), dtype = np.float16)
#             print(value.shape,pred.shape)
            for i,output in enumerate(value):
                for j, probability in enumerate(output):
                    probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                    pred[i * 4 + j, :, :] = probability
            self.predictions["logits"].append(pred)
        print(">",end = "")

    def on_loader_end(self, state: RunnerState):
        self.predictions = {
            key: np.concatenate(value, axis=0)
            for key, value in self.predictions.items()
        }

if JIT_PRED:
    models = list(torch.jit.load(p) for p in JIT_PATHS)
    model = ensModel(models)
else:
    infer_cb.append(CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),)
infer_cb.append(InferCallback())

In [None]:
encoded_pixels = []

# Rebuild data loader

train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids[:SAMPLE_NUMBER], transforms = get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
train_loader = DataLoader(train_dataset, batch_size=bs*8, shuffle=False, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=bs*8, shuffle=False, num_workers=num_workers)

loaders = {"infer": train_loader if FIND_TRAIN else valid_loader}
# Run inference through model
print("Running inference:")
print("="*(len(train_dataset if FIND_TRAIN else valid_dataset)//(bs*8)))
runner.infer(
    model=model,
    loaders=loaders,
    callbacks=infer_cb,
)
valid_masks = []
print("Build valid mask on :\t%s"%("train data" if FIND_TRAIN else "valid data"))
for i, batch in enumerate(tqdm.tqdm(train_dataset if FIND_TRAIN else valid_dataset)):
    image, mask = batch
    for m in mask: # for each seg class
        if m.shape != (350, 525):
            m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
        valid_masks.append(m)
probabilities  = runner.callbacks[0].predictions["logits"]

## Find optimal values

First of all, my thanks to @samusram for finding a mistake in my validation
https://www.kaggle.com/c/understanding_cloud_organization/discussion/107711#622412

And now I find optimal values separately for each class.

In [None]:
class_params = {}
for class_id in range(4):
    print(class_id)
    attempts = []
    for t in range(30, 75, 5):
        t /= 100
        for ms in [0, 100, 1200, 5000, 10000]:
            masks = []
            for i in range(class_id, len(probabilities), 4):
                probability = probabilities[i]
                predict, num_predict = post_process(sigmoid(probability), t, ms)
                masks.append(predict)

            d = []
            for i, j in zip(masks, valid_masks[class_id::4]):
                if (i.sum() == 0) & (j.sum() == 0):
                    d.append(1)
                else:
                    d.append(dice(i, j))

            attempts.append((t, ms, np.mean(d)))

    attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice'])


    attempts_df = attempts_df.sort_values('dice', ascending=False)
    print(attempts_df.head())
    best_threshold = attempts_df['threshold'].values[0]
    best_size = attempts_df['size'].values[0]
    
    class_params[class_id] = (best_threshold, best_size)



In [None]:
print(class_params)

In [None]:
#sns.lineplot(x='threshold', y='dice', hue='size', data=attempts_df);
#plt.title('Threshold and min size vs dice for one of the classes');

Now let's have a look at our masks.

In [None]:
#for i, (input, output) in enumerate(zip(
#        valid_dataset, runner.callbacks[0].predictions["logits"])):
#    image, mask = input
#        
#    image_vis = image.transpose(1, 2, 0)
#    mask = mask.astype('uint8').transpose(1, 2, 0)
#    pr_mask = np.zeros((350, 525, 4))
#    for j in range(4):
#        probability = cv2.resize(output.transpose(1, 2, 0)[:, :, j], dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
#        pr_mask[:, :, j], _ = post_process(sigmoid(probability), class_params[j][0], class_params[j][1])
    #pr_mask = (sigmoid(output) > best_threshold).astype('uint8').transpose(1, 2, 0)
    
        
#    visualize_with_raw(image=image_vis, mask=pr_mask, original_image=image_vis, original_mask=mask, raw_image=image_vis, raw_mask=output.transpose(1, 2, 0))
    
#    if i >= 2:
#        break

## Predicting

In [None]:
import gc
torch.cuda.empty_cache()
gc.collect()

In [None]:
test_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids, transforms = get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False, num_workers=0)

loaders = {"test": test_loader}

In [None]:
encoded_pixels = []
image_id = 0
for i, test_batch in enumerate(tqdm.tqdm(loaders['test'])):
    runner_out = runner.predict_batch({"features": test_batch[0].cuda()})['logits']
    for i, batch in enumerate(runner_out):
        for probability in batch:
            
            probability = probability.cpu().detach().numpy()
            if probability.shape != (350, 525):
                probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
            predict, num_predict = post_process(sigmoid(probability), class_params[image_id % 4][0], class_params[image_id % 4][1])
            if num_predict == 0:
                encoded_pixels.append('')
            else:
                r = mask2rle(predict)
                encoded_pixels.append(r)
            image_id += 1

In [None]:
from datetime import datetime


In [None]:
sub['EncodedPixels'] = encoded_pixels
sub.to_csv('%s_submission.csv'%(int(datetime.now().timestamp())), columns=['Image_Label', 'EncodedPixels'], index=False)