# [Sartorius - Cell Instance Segmentation [MMDetection] ](https://www.kaggle.com/c/sartorius-cell-instance-segmentation)
## Detect single neuronal cells in microscopy images with MMDetection


Referenced from AWSAF's notebook : 
> [Sartorius: MMDetection [Train]](https://www.kaggle.com/awsaf49/sartorius-mmdetection-train) - current notebook reference

> [Sartorius: MMDetection [Infer]](https://www.kaggle.com/awsaf49/sartorius-mmdetection-infer) 

**Install requirements**: 
- mmdetection, mmdet, mmpycocotools, addict, yapf, mmcv_full

In [2]:
!rsync -a ../input/mmdetection-v280/mmdetection ../
# (rsync - remote sync) back up all files 

!pip install ../input/mmdetection-v280/src/mmdet-2.8.0/mmdet-2.8.0/
# mmdetection is an open source object detection toolbox based on PyTorch - part of the OpenMMLab project.

!pip install ../input/mmdetection-v280/src/mmpycocotools-12.0.3/mmpycocotools-12.0.3/
# mmpycocotools loads, parses and visualizes annotations in COCO

!pip install ../input/mmdetection-v280/src/addict-2.4.0-py3-none-any.whl
# Addict is a Python module that gives you dictionaries whose values are both gettable and settable using attributes, 
# in addition to standard item-syntax.

!pip install ../input/mmdetection-v280/src/yapf-0.30.0-py2.py3-none-any.whl
# Yapf algorithm takes the code and reformats it to the best formatting that conforms to the style guide, 
# even if the original code didn't violate the style guide. 

!pip install ../input/mmdetection-v280/src/mmcv_full-1.2.6-cp37-cp37m-manylinux1_x86_64.whl
# MMCV is a foundational library for computer vision research,
# supports universal IO APIs, image/ideo processing, image and annotation visualization,
# pytorch runner with hooking mechanism, various CNN architectures, etc.

**Import required libraries**

In [3]:
import pandas as pd
import numpy as np

from glob import glob
# makes formatting file path easier

import os
import cv2
# read, write, format images

import pickle
from tqdm.notebook import tqdm
# adding smart progress meter to loops

from itertools import groupby
# similar to enumerate function in loops

from pycocotools import mask as mutils
from pycocotools import _mask as coco_mask
# assists in loading, parsing and visualizing annotations in COCO

import matplotlib.pyplot as plt

from multiprocessing import Pool
# Multiprocessing's Pool object offers a convenient means of parallelizing 
# the execution of a function across multiple input values, 
# distributing the input data across processes (data parallelism). 

**Preparing meta data**

In [4]:
# setting config files and directory path

conf_name = "mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco"
cell_mask_dir = '../input/sartorius-segmentation-mask-npz-dataset/'    
ROOT = '../input/sartorius-cell-instance-segmentation/'
train_or_test = 'train'
img_dir = f'../work/mmdet_{train_or_test}'
!mkdir -p {img_dir}

# adding image path and label to the train meta data

df = pd.read_csv(f'{ROOT}/train.csv')
df['image_path'] = ROOT + '/train/' + df['id'] + '.png'
tmp_df = df.drop_duplicates(subset=["id", "image_path"]).reset_index(drop=True)
tmp_df["annotation"] = df.groupby("id")["annotation"].agg(list).reset_index(drop=True)
df = tmp_df.copy()
df['label'] = df.cell_type.map({v:k for k, v in enumerate(df.cell_type.unique())})
display(df.head(5))

**Helper functions**

In [5]:
# creating np array of boundaries (segmentation mask) from image annotations (rle)

def rle2mask(rle, shape=[520, 704]):
    # RLE format to Mask format :
    # numpy array of the image size with 
    # 'masked' components as 1, others as 0
    
    s = rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)


def get_mask(image_id):
    # referenced from @onodera's notebook
    # making sure the id equals the image_id, calling the rle2mask function
    # to return a stack of numpy array
    
    mask_df = df[df.id == image_id]
    gt_masks = [rle2mask(rle) for rle in mask_df["annotation"].tolist()[0]]
    gt_masks = np.stack(gt_masks)
    return gt_masks

In [6]:
# the opposite : creating image annotations (rle) from np array of boundaries (segmentation mask)

def get_rles_from_mask(image_id):
    # loads the npz files, getting array components with value != 0
    # and calls the coco_rle_encode function to convert the array components to rle format
    
    mask = np.load(f'{cell_mask_dir}/{image_id}.npz')['arr_0']
    rle_list = []
    mask_ids = np.unique(mask)
    for val in mask_ids:
        if val == 0:
            continue
        binary_mask = np.where(mask == val, 1, 0).astype(bool)
        rle = coco_rle_encode(binary_mask)
        rle_list.append(rle)
    return rle_list, mask.shape[0], mask.shape[1]


def coco_rle_encode(mask):
    # converting the mask boundaries to rle format via dictionary
    
    rle = {'counts': [], 'size': list(mask.shape)}
    counts = []
    for i, (value, elements) in enumerate(groupby(mask.ravel(order='F'))):
        if i == 0 and value == 1:
            counts.append(0)
        counts.append(len(list(elements)))
    rle['counts'] = counts
    return rle


def mk_mmdet_custom_data(image_id):
    # returns the meta data from the rle formatted data
    
    rles, height, width = get_rles_from_mask(image_id)
    if len(rles) == 0:
        return {
            'filename': image_id+'.png',
            'width': width,
            'height': height,
            'ann': {}
        }
    
    rles = mutils.frPyObjects(rles, height, width) 
    # frPyObjects : converts polygon, bbox, and uncompressed RLE to encoded RLE mask
    
    bboxes = mutils.toBbox(rles)
    # toBbox : get bounding boxes surrounding encoded masks
    bboxes[:, 2] += bboxes[:, 0]
    bboxes[:, 3] += bboxes[:, 1]
    return {
        'filename': image_id+'.png',
        'width': width,
        'height': height,
        'ann':
            {
                'bboxes': np.array(bboxes, dtype=np.float32),
                'labels': np.zeros(len(bboxes)), # dummy data.(will be replaced later)
                'masks': rles
            }
    }



def print_masked_img(image_id, mask):
    img   = load_RGBY_image(image_id, train_or_test)[...,0]
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    img2  = clahe.apply(img)
    img3  = cv2.equalizeHist(img)
    img   = np.stack([img, img2, img3],axis=-1)
    # CLAHE (contrast limited adaptive histogram equalization) :
    # image is divided into small blocks called "tiles",
    # then each block is histogram equalized, 'contrast limiting' used to prevent noise amplification
    
    plt.figure(figsize=(15, 15))
    plt.subplot(1, 3, 1)
    plt.imshow(img)
    plt.title('Image')
    plt.axis('off')
    
    plt.subplot(1, 3, 2)
    plt.imshow(mask,cmap='inferno')
    plt.title('Mask')
    plt.axis('off')
    
    plt.subplot(1, 3, 3)
    plt.imshow(img)
    plt.imshow(mask, alpha=0.4, cmap='inferno')
    plt.title('Image + Mask')
    plt.axis('off')
    plt.tight_layout()
    plt.show()
    # showing both img and mask with mask opacity of .4
    
    
def load_RGBY_image(image_id, train_or_test='train', image_size=None):
    # loading image from train set, stacking the retrieved images as numpy arrays
    
    img = read_img(image_id, train_or_test, image_size)
    stacked_images = np.stack([img for _ in range(3)],axis=-1)
    return stacked_images


def read_img(image_id, train_or_test='train', image_size=None):
    # retrieves images from the directory, resizes them 
    
    filename = f'{ROOT}/{train_or_test}/{image_id}.png'
    assert os.path.exists(filename), f'not found {filename}'
    img = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
    if image_size is not None:
        img = cv2.resize(img, (image_size, image_size))
    if img.max() > 255:
        img_max = img.max()
        img = (img/255).astype('uint8')
    return img


def mk_ann(idx):
    #  sums up the functions above
    image_id = df.iloc[idx]['id']
    anno = mk_mmdet_custom_data(image_id)
    img = load_RGBY_image(image_id, train_or_test)
    cv2.imwrite(f'{img_dir}/{image_id}.png', img)
    return anno, idx, image_id

- **Visualizing Image, Mask, and Both**

In [7]:
# print images from the mask directory

for idx in range(3):
    image_id = df.iloc[idx]['id']
    cell_mask = np.load(f'{cell_mask_dir}/{image_id}.npz')['arr_0']
    print_masked_img(image_id, cell_mask)

- **Data preprocessing step for the model**

In [9]:
# getting multiprocessing ready for the CPU

MAX_THRE = 4 
# Number of total CPU

p = Pool(processes=MAX_THRE)
annos = []
len_df = len(df)

# Applying previous functions to multiprocessing
# Loading train images to create masks and convert them to RLE

for anno, idx, image_id in tqdm(p.imap(mk_ann, range(len(df))), total=len(df)):
    if len(anno['ann']) > 0:
        annos.append(anno)

In [11]:
# reformatting the annotations retrieved from the previous step

lbl_cnt_dict = df.set_index('id').to_dict()['label']
trn_annos = []
val_annos = []
val_len = int(len(annos)*0.01)

for idx in range(len(annos)):
    ann = annos[idx]
    filename  = ann['filename'].replace('.jpg','').replace('.png','')
    label_ids = [0]
    len_ann   = len(ann['ann']['bboxes'])
    bboxes    = ann['ann']['bboxes']
    masks     = ann['ann']['masks']

    for cnt, label_id in enumerate(label_ids):
        label_id = int(label_id)
        if cnt == 0:
            ann['ann']['labels'] = np.full(len_ann, label_id)
        else:
            ann['ann']['bboxes'] = np.concatenate([ann['ann']['bboxes'],bboxes])
            ann['ann']['labels'] = np.concatenate([ann['ann']['labels'],np.full(len_ann, label_id)])
            ann['ann']['masks'] = ann['ann']['masks'] + masks    
    if idx < val_len:
        val_annos.append(ann)
    else:
        trn_annos.append(ann)

In [12]:
# saving the processed files via pickle

with open(f'../work/mmdet_full.pkl', 'wb') as f:
    pickle.dump(annos, f)
with open(f'../work/mmdet_trn.pkl', 'wb') as f:
    pickle.dump(trn_annos, f)
with open(f'../work/mmdet_val.pkl', 'wb') as f:
    pickle.dump(val_annos, f)

- Downloading config files based on default mask_rcnn

In [14]:
!cp -r /kaggle/input/sartorius-mmdet-config-ds/sartorius /kaggle/mmdetection/configs/sartorius

In [15]:
!ls -l ../mmdetection/configs/sartorius/

- **Base Model**

In [16]:
%%writefile /kaggle/mmdetection/configs/sartorius/mask_rcnn_r50_fpn2.py

# model settings

model = dict(
    type='MaskRCNN',
    pretrained='torchvision://resnet50',
    backbone=dict(
        type='ResNet',
        depth=40,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=True,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            scales=[8],
            ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
    roi_head=dict(
        type='StandardRoIHead',
        bbox_roi_extractor=dict(
            type='SingleRoIExtractor',
            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]),
        bbox_head=dict(
            type='Shared2FCBBoxHead',
            in_channels=256,
            fc_out_channels=1024,
            roi_feat_size=7,
            num_classes=1, # number of class
            bbox_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[0., 0., 0., 0.],
                target_stds=[0.1, 0.1, 0.2, 0.2]),
            reg_class_agnostic=False,
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
        mask_roi_extractor=dict(
            type='SingleRoIExtractor',
            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]),
        mask_head=dict(
            type='FCNMaskHead',
            num_convs=4,
            in_channels=256,
            conv_out_channels=256,
            num_classes=1, # number of class
            loss_mask=dict(
                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))))


# model training and testing settings

train_cfg = dict(
    rpn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.7,
            neg_iou_thr=0.3,
            min_pos_iou=0.3,
            match_low_quality=True,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=256,
            pos_fraction=0.5,
            neg_pos_ub=-1,
            add_gt_as_proposals=False),
        allowed_border=-1,
        pos_weight=-1,
        debug=False),
    rpn_proposal=dict(
        nms_across_levels=False,
        nms_pre=2000,
        nms_post=1000,
        max_num=1000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            match_low_quality=True,
            ignore_iof_thr=-1),
        sampler=dict(
            type='RandomSampler',
            num=512,
            pos_fraction=0.25,
            neg_pos_ub=-1,
            add_gt_as_proposals=True),
        mask_size=28,
        pos_weight=-1,
        debug=False))
test_cfg = dict(
    rpn=dict(
        nms_across_levels=False,
        nms_pre=1000,
        nms_post=1000,
        max_num=1000,
        nms_thr=0.7,
        min_bbox_size=0),
    rcnn=dict(
        score_thr=0.05,
        nms=dict(type='nms', iou_threshold=0.5),
        max_per_img=200,
        mask_thr_binary=0.5))

- **Model Augmentation** :

> Flip

> Multi-scale

> Photo Metric Distortion

In [18]:
%%writefile /kaggle/mmdetection/configs/sartorius/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py

_base_ = 'mask_rcnn_r50_fpn_1x_coco.py'
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
    pretrained='open-mmlab://resnest50',
    backbone=dict(
        type='ResNeSt',
        # ResNeSt : Split-Attention Networks
        # shows superior transfer learning results serving as the backbone
        
        stem_channels=64,
        depth=50,
        radix=2,
        reduction_factor=4,
        avg_down_stride=True,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=norm_cfg,
        norm_eval=False,
        style='pytorch'),
    roi_head=dict(
        bbox_head=dict(
            type='Shared4Conv1FCBBoxHead',
            conv_out_channels=256,
            norm_cfg=norm_cfg),
        mask_head=dict(norm_cfg=norm_cfg)))
# # use ResNeSt img_norm
img_norm_cfg = dict(
    mean=[123.68, 116.779, 103.939], std=[58.393, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='LoadAnnotations',
        with_bbox=True,
        with_mask=True,
        poly2mask=True),
    dict(
        type='Resize',
        img_scale=[(1333, 1333), (1280, 1280), (1024, 1024)],
        multiscale_mode='value',
        keep_ratio=True),
    dict(type='RandomFlip', direction=['horizontal', 'vertical'], flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1280, 1280),
        flip=True,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip',direction=['horizontal', 'vertical']),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
]

data = dict(
    samples_per_gpu=2, # batch_size
    train=dict(pipeline=train_pipeline),
    val=dict(pipeline=test_pipeline),
    test=dict(pipeline=test_pipeline))

- **optimizer, epochs and learning rate setting**

In [17]:
%%writefile /kaggle/mmdetection/configs/sartorius/schedule_1x.py
 
# optimizer
optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)

# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=0.001,
    step=[8, 11])
total_epochs = 20

- **training the model**

In [None]:
config = f'configs/sartorius/{conf_name}.py'
# using --no-validate to avoid some errors for custom dataset metrics
additional_conf = '--cfg-options' # --no-validate
additional_conf += f' work_dir=../working/work_dir'
additional_conf += f' optimizer.lr=0.0025'
cmd = f'bash -x tools/dist_train.sh {config} 1 {additional_conf}'
!cd ../mmdetection;  {cmd}