In [None]:
%load_ext autoreload
%autoreload
from factory import *
import torch
import os 
import sys
from catalyst.dl.callbacks import CriterionCallback, EarlyStoppingCallback, OptimizerCallback, CriterionAggregatorCallback, F1ScoreCallback, AUCCallback
from catalyst.dl.runner import SupervisedRunner
from pytorch_toolbelt import losses as L
from pytorch_toolbelt.inference import tta
import collections
from pytorch_toolbelt.utils.catalyst import * 
from metrics import *
import matplotlib.pyplot as plt
from viz_utils import *
from tqdm import tqdm
import seaborn as sns
import cv2
import segmentation_models_pytorch as smp
from catalyst.contrib.optimizers import RAdam, Lookahead
import itertools
%matplotlib inline

In [None]:
experiment_names = [
    'se_resnext50_32x4d_simple',
    'se_resnext101_32x4d_simple',
    'efficientnet-b5_simple',
    'resnext50_32x4d_with_mask_and_boundaries', 
    'resnext101_32x8d_with_mask_and_boundaries']
log_dirs = [
    'logs/se_resnext50_32x4d_simple',
    'logs/se_resnext101_32x4d_simple',
    'logs/efficientnet-b5_simple',
    'logs/resnext50_32x4d_with_mask_and_boundaries', 
    'logs/resnext101_32x8d_with_mask_and_boundaries']
encoder_names = [
    'se_resnext50_32x4d',
    'se_resnext101_32x4d',
    'efficientnet-b5',
    'resnext50_32x4d', 
    'resnext101_32x8d']

In [None]:
train_df_path = 'data/train.csv'
data_folder = "data/train_images/"
test_data_folder = "data/test_images/"
val_output_folder = "data/validation_predictions"
sample_submission_path = 'data/sample_submission.csv'

In [None]:
train_df, val_df = return_masks(train_df_path)

In [None]:
all_data = []
images_id = val_df.index.values
for exp in experiment_names:
    data = np.load(os.path.join(val_output_folder, exp+'.npz.npy'))
    data = data[:,:4,...]
    data = dict(zip(images_id, data))
    print('loaded {}'.format(exp))
    all_data.append(data)
    del data

Visualize optimal threshold and min_size for each class

In [None]:
gt_masks = []
images_id = []
for image_idx in tqdm(range(len(val_df.index.values))):
    image_name =  val_df.index.values[image_idx]
    labels = val_df.loc[image_name,:][:4]
    masks = np.zeros((256, 1600, 4), dtype=np.float32) # float32 is V.Imp
    for idx, label in enumerate(labels.values):
        if label is not np.nan:
            label = label.split(" ")
            positions = map(int, label[0::2])
            length = map(int, label[1::2])
            mask = np.zeros(256 * 1600, dtype=np.uint8)
            for pos, le in zip(positions, length):
                mask[pos:(pos + le)] = 1
            masks[:, :, idx] = mask.reshape(256, 1600, order='F')
    gt_masks.append(masks)
    images_id.append(val_df.index.values[image_idx])

In [None]:
dict_of_gt_masks = dict(zip(images_id, gt_masks))

In [None]:
defect_idx = 0
thrs = [0.4, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8]
min_sizes = [400, 450, 500, 550, 600, 650, 700, 750, 800]
all_dices = []
all_thr = []
all_minsizes =[]
comb_thr_minsize = itertools.product(thrs, min_sizes)
for thr, min_size in tqdm(comb_thr_minsize):
    dices_mean = []
    for image in images_id:
        predictions = torch.nn.Sigmoid()(torch.from_numpy(sum([all_data[exp][image][defect_idx,...] for exp in experiment_names])/len(experiment_names))).numpy()
        gt_mask = dict_of_gt_masks[image][...,defect_idx]
        predictions_bin = predictions > thr
        if predictions_bin.sum() < min_size:
            predictions_bin = np.zeros(predictions_bin.shape)
        dice_gt_pr = dice(gt_mask,
                          predictions_bin,
                          empty_score=1.0)
        dices_mean.append(dice_gt_pr)
    print('thr {}, minsize {}, DICE : {}'.format(thr, min_size, np.mean(dices_mean))) 
    all_dices.append(np.mean(dices_mean))
    all_thr.append(thr)
    all_minsizes.append(min_size)

In [None]:
scores = pd.DataFrame({'dices':all_dices, 'thr':all_thr, 'min_size':all_minsizes})
scores.sort_values(['dices'],ascending=False).head()

In [None]:
sns.lineplot(x='thr',y='dices',hue='min_size', data=scores, linewidth=2.5)

In [None]:
defect_idx = 1
thrs = [0.4, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8]
min_sizes = [400, 450, 500, 550, 600, 650, 700, 750, 800]
all_dices = []
all_thr = []
all_minsizes =[]
comb_thr_minsize = itertools.product(thrs, min_sizes)
for thr, min_size in tqdm(comb_thr_minsize):
    dices_mean = []
    for image in images_id:
        predictions = torch.nn.Sigmoid()(torch.from_numpy(sum([all_data[exp][image][defect_idx,...] for exp in experiment_names])/len(experiment_names))).numpy()
        gt_mask = dict_of_gt_masks[image][...,defect_idx]
        predictions_bin = predictions > thr
        if predictions_bin.sum() < min_size:
            predictions_bin = np.zeros(predictions_bin.shape)
        dice_gt_pr = dice(gt_mask,
                          predictions_bin,
                          empty_score=1.0)
        dices_mean.append(dice_gt_pr)
    print('thr {}, minsize {}, DICE : {}'.format(thr, min_size, np.mean(dices_mean))) 
    all_dices.append(np.mean(dices_mean))
    all_thr.append(thr)
    all_minsizes.append(min_size)

In [None]:
scores = pd.DataFrame({'dices':all_dices, 'thr':all_thr, 'min_size':all_minsizes})
scores.sort_values(['dices'],ascending=False).head()

In [None]:
sns.lineplot(x='thr',y='dices',hue='min_size', data=scores, linewidth=2.5)

In [None]:
defect_idx = 2
thrs = [0.4, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8]
min_sizes = [800, 900, 1000, 1100, 1200]
all_dices = []
all_thr = []
all_minsizes =[]
comb_thr_minsize = itertools.product(thrs, min_sizes)
for thr, min_size in tqdm(comb_thr_minsize):
    dices_mean = []
    for image in images_id:
        predictions = torch.nn.Sigmoid()(torch.from_numpy(sum([all_data[exp][image][defect_idx,...] for exp in experiment_names])/len(experiment_names))).numpy()
        gt_mask = dict_of_gt_masks[image][...,defect_idx]
        predictions_bin = predictions > thr
        if predictions_bin.sum() < min_size:
            predictions_bin = np.zeros(predictions_bin.shape)
        dice_gt_pr = dice(gt_mask,
                          predictions_bin,
                          empty_score=1.0)
        dices_mean.append(dice_gt_pr)
    print('thr {}, minsize {}, DICE : {}'.format(thr, min_size, np.mean(dices_mean))) 
    all_dices.append(np.mean(dices_mean))
    all_thr.append(thr)
    all_minsizes.append(min_size)

In [None]:
scores = pd.DataFrame({'dices':all_dices, 'thr':all_thr, 'min_size':all_minsizes})
scores.sort_values(['dices'],ascending=False).head()

In [None]:
sns.lineplot(x='thr',y='dices',hue='min_size', data=scores, linewidth=2.5)

In [None]:
defect_idx = 3
thrs = [0.35, 0.4, 0.45,  0.5, 0.55,]
min_sizes = [2000, 2050, 2100, 2150, 2200]
all_dices = []
all_thr = []
all_minsizes =[]
comb_thr_minsize = itertools.product(thrs, min_sizes)
for thr, min_size in tqdm(comb_thr_minsize):
    dices_mean = []
    for image in images_id:
        predictions = torch.nn.Sigmoid()(torch.from_numpy(sum([all_data[exp][image][defect_idx,...] for exp in experiment_names])/len(experiment_names))).numpy()
        gt_mask = dict_of_gt_masks[image][...,defect_idx]
        predictions_bin = predictions > thr
        if predictions_bin.sum() < min_size:
            predictions_bin = np.zeros(predictions_bin.shape)
        dice_gt_pr = dice(gt_mask,
                          predictions_bin,
                          empty_score=1.0)
        dices_mean.append(dice_gt_pr)
    print('thr {}, minsize {}, DICE : {}'.format(thr, min_size, np.mean(dices_mean))) 
    all_dices.append(np.mean(dices_mean))
    all_thr.append(thr)
    all_minsizes.append(min_size)

In [None]:
scores = pd.DataFrame({'dices':all_dices, 'thr':all_thr, 'min_size':all_minsizes})
scores.sort_values(['dices'],ascending=False).head()

In [None]:
sns.lineplot(x='thr',y='dices',hue='min_size', data=scores, linewidth=2.5)

In [None]:
thr = [0.55, 0.5, 0.55, 0.40]
min_size = [650, 500, 1200, 2100]