# Training Notebook (Catalyst)

## Without threshold finding

### Experiments

* [x] Efficientnet b5 **PL:0.652**
* [x] FP 16，refer to this [catalyst tutorial](https://github.com/catalyst-team/catalyst/blob/master/examples/notebooks/segmentation-tutorial.ipynb)
    * The model will have gradient overflow after 5th epoch, everything else is okay
* [x] Saving & Loading from JIT **PL:0.655**
* [x] Ensemble
* [x] 384x576
* [x] polygon convex
* [x] Test the funnel network again - totally not working
* [x] Ranger optimizer 
    * [x] RADAM
    * [x] Look Ahead
* [x] Predict without threshold finding, save up training hours on valuable GPU resources
* [ ] K-fold Training

### Installing Apex for FP16

```shell
git clone https://github.com/NVIDIA/apex
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex
is_fp16_used = True
```

### Other Installations

```
pip install catalyst
pip install pretrainedmodels
pip install git+https://github.com/qubvel/segmentation_models.pytorch
pip install pip pytorch-toolbelt
pip install torchvision==0.4
```

Our starter kernel is from [this open kernel](https://www.kaggle.com/artgor/segmentation-in-pytorch-using-convenient-tools)

## Importing libraries

In [1]:
import os
import cv2
import collections
import time 
import tqdm
from PIL import Image
from functools import partial
train_on_gpu = True

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# %matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import torchvision
import torchvision.transforms as transforms
import torch
from torch.utils.data import TensorDataset, DataLoader,Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR

import albumentations as albu
from albumentations import pytorch as AT

from catalyst.data import Augmentor
from catalyst.dl import utils
from catalyst.data.reader import ImageReader, ScalarReader, ReaderCompose, LambdaReader
from catalyst.dl.runner import SupervisedRunner
from catalyst.contrib.models.segmentation import Unet
from catalyst.dl.callbacks import DiceCallback, EarlyStoppingCallback, InferCallback, CheckpointCallback

import segmentation_models_pytorch as smp

from catalyst.dl.core import Callback, CallbackOrder, RunnerState
from collections import defaultdict
from catalyst.contrib.optimizers import RAdam, Lookahead

## Helper functions and classes

In [2]:
TRAIN = True
RANGER = False

EPOCHS = 20
UPLOAD_WEIGHTS = True # upload weights to google cloud storage

FP16 = False # Do we use half precision?
fp16_params = dict(opt_level = "O2") if FP16 else None

bs = 8 if FP16 else 4

LOAD = False # Do we load a trained weights at the beginning
LOAD_PATH = "cata-eff-b5.pth" # The model weight path, if we load a trained weights at the begining

ENCODER = 'se_resnext50_32x4d' # Encoder model name
ENCODER_WEIGHTS = 'imagenet' # Encoder pretrained weights
DEVICE = 'cuda' 

ACTIVATION = None

# Activate the threshold finding
TH_FIND = False
class_params = {0: (0.55, 10000), 1: (0.7, 10000), 2: (0.65, 10000), 3: (0.5, 10000)}

MIN_SIZE_RANGE = 3
MIN_SIZE = [0, 100, 1200, 5000,8000, 10000,12000][-MIN_SIZE_RANGE:]

SIZE = (384,576)
INPUT_SIZE = SIZE
OUTPUT_SIZE = SIZE

# Are we using train dataset to find the threshold
FIND_TRAIN = True
# How much percentage of train dataset are we using?
SAMPLE_RATIO = .4

K = 4

JIT_PRED = False

In [3]:
print(INPUT_SIZE,OUTPUT_SIZE)

(384, 576) (384, 576)


In [4]:
from utils_ucsi import *

## Data overview

Let's have a look at the data first.

In [5]:
from pathlib import Path
HOME = Path(os.environ["HOME"])

In [6]:
path = HOME/'ucsi'
# os.listdir(path)

We have folders with train and test images, file with train image ids and masks and sample submission.

In [None]:
train = pd.read_csv(f'{path}/train.csv')
sub = pd.read_csv(f'{path}/sample_submission.csv')

In [None]:
train.head()

In [None]:
n_train = len(os.listdir(f'{path}/train_images'))
n_test = len(os.listdir(f'{path}/test_images'))
print(f'There are {n_train} images in train dataset')
print(f'There are {n_test} images in test dataset')

In [None]:
train['Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()

So we have ~5.5k images in train dataset and they can have up to 4 masks: Fish, Flower, Gravel and Sugar.

In [None]:
train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()

In [None]:
train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[0]).value_counts().value_counts()

But there are a lot of empty masks. In fact only 266 images have all four masks. It is important to remember this.

In [None]:
train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])


sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1])
sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0])

We can see that masks can overlap. Also we can see that clouds are really similar to fish, flower and so on. Another important point: masks are often quite big and can have seemingly empty areas.

## Preparing data for modelling

At first, let's create a list of unique image ids and the count of masks for images. This will allow us to make a stratified split based on this count.

In [None]:
from datetime import datetime
class KFoldManager:
    def __init__(self,x,k):
        self.k = k
        self.x = x
        print("Initiate %s fold training"%(self.k))
        self.path = "fold_%s_n%s.csv"%(self.k,len(self.x))
        self.loadGroup()
        
    def __len__(self):
        return self.k

    def __getitem__(self, Nth):
        print("Processing on %sth fold"%(Nth))
        train_ids = self.groups[self.groups.group!=Nth]["ids"].values
        valid_ids = self.groups[self.groups.group==Nth]["ids"].values
        return train_ids , valid_ids
         
    def idGroupDF(self):
        print("Creating new group mapping")
        self.groups = pd.DataFrame({"ids":self.x,"group":np.random.choice(range(self.k),len(self.x))})
        print(self.groups.group.value_counts())
        return self.groups   
    
    def saveGroup(self):
        self.groups.to_csv(self.path, index = False)
        print("Saving to file:%s"%(self.path))
        
    def loadGroup(self):
        if os.path.exists(self.path):
            self.groups = pd.read_csv(self.path)
            print(self.groups.group.value_counts())
        else:
            self.idGroupDF()
            self.saveGroup()
            
    def start(self,n):
        print("="*70)
        self.time1 = datetime.now()
        print("fold",n,self.time1.strftime("starting at %H:%M:%S"))
    
    def end(self,n):
        self.time2 = datetime.now()
        print("fold",n,self.time2.strftime("ending at %H:%M:%S"))
        delta = (self.time2-self.time1).seconds
        print("This fold:%s h\t%s m\t%s s"%(delta//3600,delta//60,delta%60))
        print("="*70)

In [15]:
class CloudDataset(Dataset):
    def __init__(self, df: pd.DataFrame = None, datatype: str = 'train', img_ids: np.array = None,
                 transforms = albu.Compose([albu.HorizontalFlip(),AT.ToTensor()]),
                preprocessing=None):
        self.df = df
        if datatype != 'test':
            self.data_folder = f"{path}/train_images"
        else:
            self.data_folder = f"{path}/test_images"
        self.img_ids = img_ids
        self.transforms = transforms
        self.preprocessing = preprocessing

    def __getitem__(self, idx):
        image_name = self.img_ids[idx]
        mask = make_mask(self.df, image_name)
        image_path = os.path.join(self.data_folder, image_name)
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask']
        if self.preprocessing:
            preprocessed = self.preprocessing(image=img, mask=mask)
            img = preprocessed['image']
            mask = preprocessed['mask']
            
        return img,mask

    def __len__(self):
        return len(self.img_ids)

### K - Fold Traing

In [16]:
id_mask_count = train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[0]).value_counts().\
reset_index().rename(columns={'index': 'img_id', 'Image_Label': 'count'})

# Test ids
test_ids = sub['Image_Label'].apply(lambda x: x.split('_')[0]).drop_duplicates().values
# Kfold manager
kfm = KFoldManager(id_mask_count['img_id'].values,5)

Initiate 5 fold training
3    1139
2    1129
0    1108
1    1088
4    1082
Name: group, dtype: int64


In [None]:
for fold in range(len(kfm)):
    kfm.start(fold)
    train_ids,valid_ids = kfm[fold]
    print("Train:\t%s\tValid:\t%s"%(len(train_ids),len(valid_ids)))
    
    print("[Fold %s]\tStructuring Model]"%(fold))
    model = smp.FPN(
        encoder_name=ENCODER, 
        encoder_weights=ENCODER_WEIGHTS, 
        classes=4, 
        activation=ACTIVATION,
    )
    preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)
    
    num_workers = 0
    
    print("[Fold %s]\tCreating datasets, dataloaders]"%(fold))
    train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms = get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids, transforms = get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
    
    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers)
    valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=num_workers)
    
    loaders = {
        "train": train_loader,
        "valid": valid_loader
    }
    
    

    logdir = "./logs/seg_%s_f%s"%(ENCODER, fold)
    
    # model, criterion, optimizer
    opt_class = RAdam if RANGER else torch.optim.Adam
    
    op_list = [
        {'params': model.decoder.parameters(), 'lr': 1e-2}, 
        {'params': model.encoder.parameters(), 'lr': 1e-3},  # Pretrained section of the model using smaller lr
    ]
        
    # optimizer, loss function and runner    
    optimizer_ = opt_class(op_list, weight_decay=3e-4)
    optimizer = Lookahead(optimizer_) if RANGER else optimizer_
    scheduler = ReduceLROnPlateau(optimizer, factor=0.18, patience=2)
    #scheduler = ReduceLROnPlateau(optimizer, factor=0.25, patience=2)
    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    
    print("[Fold %s]\tPutting model to cuda"%(fold))
    model = model.cuda()
    runner = SupervisedRunner()
    
    print("[Fold %s]\tRun training"%(fold))
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=[DiceCallback(), EarlyStoppingCallback(patience=5, min_delta=0.001)],
        logdir=logdir,
        num_epochs=EPOCHS,
        fp16=fp16_params,
        verbose=True
    )
    
    print("[Fold %s]\tTaking model out of cuda"%(fold))
    model = model.cpu()
    
    # end timer 
    kfm.end(fold)
    

fold 0 starting at 08:15:14
Processing on 0th fold
Train:	4438	Valid:	1108
[Fold 0]	Structuring Model]


Downloading: "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth" to /home/b2ray2c/.cache/torch/checkpoints/se_resnext50_32x4d-a260b3a4.pth
 18%|█▊        | 19.1M/105M [00:56<04:13, 357kB/s] 

### Prediction

In [None]:
# Allocate all model weigths
log_dirs = list({"path":HOME/"ucsi"/"logs"/str("seg_%s_f%s"%(ENCODER, fold))/"checkpoints"/"best.pth",
                 "encoder":ENCODER } for fold in range(K))
print(log_dirs)

Upload weights to google cloud storage

In [None]:

if UPLOAD_WEIGHTS:
    from utils_google import download_blob,upload_blob,list_blobs_with_prefix
    for i in range(len(log_dirs)):
        upload_blob("milkyway",str(log_dirs[i]["path"]),"pth/%s_f%s.pth"%(ENCODER,i))

In [None]:
def loadModel(path,encoder):
    model = smp.FPN(
        encoder_name=ENCODER, 
        encoder_weights=None, 
        classes=4, 
        activation=ACTIVATION,
    )
    model.load_state_dict(torch.load(path)["model_state_dict"])
    model = model.cuda()
    
    return model

In [None]:
models = list(loadModel(i["path"], i["encoder"]) for i in log_dirs)

In [None]:
class ensModel(nn.Module):
    def __init__(self, models):
        super().__init__()
        self.models = models
    
    def __call__(self, x):
        res = []
        x = x.cuda()
        with torch.no_grad():
            for m in self.models:
                res.append(m(x))
        res = torch.stack(res)
        return torch.mean(res, dim=0)

In [None]:
model = ensModel(models)

In [None]:
infer_cb = []

In [None]:


# A modified version to save memory when do the inference
class InferCallback(Callback):
    def __init__(self, out_dir=None, out_prefix=None):
        super().__init__(CallbackOrder.Internal)
        self.out_dir = out_dir
        self.out_prefix = out_prefix
        self.predictions = defaultdict(lambda: [])
        self._keys_from_state = ["out_dir", "out_prefix"]

    def on_stage_start(self, state: RunnerState):
        for key in self._keys_from_state:
            value = getattr(state, key, None)
            if value is not None:
                setattr(self, key, value)
        # assert self.out_prefix is not None
        if self.out_dir is not None:
            self.out_prefix = str(self.out_dir) + "/" + str(self.out_prefix)
        if self.out_prefix is not None:
            os.makedirs(os.path.dirname(self.out_prefix), exist_ok=True)

    def on_loader_start(self, state: RunnerState):
        self.predictions = {"logits":list()}
    
    def on_batch_end(self, state: RunnerState):
        dct = state.output
        dct = {key: value.detach().cpu().numpy() for key, value in dct.items()}
        for key, value in dct.items():
            pred = np.zeros((len(value)*4, 350, 525), dtype = np.float16)
#             print(value.shape,pred.shape)
            for i,output in enumerate(value):
                for j, probability in enumerate(output):
                    probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                    pred[i * 4 + j, :, :] = probability
            self.predictions["logits"].append(pred)
        print(">",end = "")

    def on_loader_end(self, state: RunnerState):
        self.predictions = {
            key: np.concatenate(value, axis=0)
            for key, value in self.predictions.items()
        }

if JIT_PRED:
    models = list(torch.jit.load(p) for p in JIT_PATHS)
    model = ensModel(models)
else:
    infer_cb.append(CheckpointCallback(resume=f"{logdir}/checkpoints/best.pth"),)
infer_cb.append(InferCallback())

In [None]:
encoded_pixels = []

# Rebuild data loader

train_dataset = CloudDataset(df=train, datatype='train', img_ids=train_ids, transforms = get_training_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
train_loader = DataLoader(train_dataset, batch_size=bs*8, shuffle=False, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=bs*8, shuffle=False, num_workers=num_workers)

if TH_FIND:
    loaders = {"infer": train_loader if FIND_TRAIN else valid_loader}
    # Run inference through model
    print("Running inference:")
    print("="*(len(train_dataset if FIND_TRAIN else valid_dataset)//(bs*8)))
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=infer_cb,
    )
    valid_masks = []
    print("Build valid mask on :\t%s"%("train data" if FIND_TRAIN else "valid data"))
    for i, batch in enumerate(tqdm.tqdm(train_dataset if FIND_TRAIN else valid_dataset)):
        image, mask = batch
        for m in mask: # for each seg class
            if m.shape != (350, 525):
                m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
            valid_masks.append(m)
    probabilities  = runner.callbacks[0].predictions["logits"]

## Find optimal values

First of all, my thanks to @samusram for finding a mistake in my validation
https://www.kaggle.com/c/understanding_cloud_organization/discussion/107711#622412

And now I find optimal values separately for each class.

In [None]:
if TH_FIND:
    class_params = {}
    for class_id in range(4):
        print(class_id)
        attempts = []
        for t in range(30, 75, 5):
            t /= 100
            for ms in [0, 100, 1200, 5000, 10000]:
                masks = []
                for i in range(class_id, len(probabilities), 4):
                    probability = probabilities[i]
                    predict, num_predict = post_process(sigmoid(probability), t, ms)
                    masks.append(predict)
    
                d = []
                for i, j in zip(masks, valid_masks[class_id::4]):
                    if (i.sum() == 0) & (j.sum() == 0):
                        d.append(1)
                    else:
                        d.append(dice(i, j))
    
                attempts.append((t, ms, np.mean(d)))
    
        attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice'])
    
    
        attempts_df = attempts_df.sort_values('dice', ascending=False)
        print(attempts_df.head())
        best_threshold = attempts_df['threshold'].values[0]
        best_size = attempts_df['size'].values[0]
        
        class_params[class_id] = (best_threshold, best_size)
else:
    print("Not running threshold finding, using default threshold config")
print(class_params)

## Predicting

In [None]:
import gc
torch.cuda.empty_cache()
gc.collect()

In [None]:
test_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids, transforms = get_validation_augmentation(), preprocessing=get_preprocessing(preprocessing_fn))
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False, num_workers=0)

loaders = {"test": test_loader}

In [None]:
runner.model = model

In [None]:
encoded_pixels = []
image_id = 0
for i, test_batch in enumerate(tqdm.tqdm(loaders['test'])):
    runner_out = runner.predict_batch({"features": test_batch[0].cuda()})['logits']
    for i, batch in enumerate(runner_out):
        for probability in batch:
            
            probability = probability.cpu().detach().numpy()
            if probability.shape != (350, 525):
                probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
            predict, num_predict = post_process(sigmoid(probability), class_params[image_id % 4][0], class_params[image_id % 4][1])
            if num_predict == 0:
                encoded_pixels.append('')
            else:
                r = mask2rle(predict)
                encoded_pixels.append(r)
            image_id += 1

In [None]:
sub['EncodedPixels'] = encoded_pixels
sub.to_csv('%s_submission.csv'%(int(datetime.now().timestamp())), columns=['Image_Label', 'EncodedPixels'], index=False)