# STEP 1: Install your requirements

In [1]:
import os
import shutil
import cv2
import pandas as pd
import numpy as np
import json
import torch
import pycocotools._mask as _mask
import matplotlib.pyplot as plt
from os import listdir
from os.path import isfile, isdir, join
from google_drive_downloader import GoogleDriveDownloader as gdd

In [2]:
!pip install pyyaml==5.1

TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)

# Install detectron2 that matches the above pytorch version
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/$CUDA_VERSION/torch$TORCH_VERSION/index.html

import detectron2
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer
from detectron2.utils.visualizer import ColorMode
from detectron2.utils.logger import setup_logger
setup_logger()

from skimage.measure import find_contours
from skimage.measure import label
from scipy.ndimage.morphology import binary_fill_holes
from skimage.morphology import dilation, erosion
from itertools import groupby

Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[?25l[K     |█▏                              | 10 kB 19.4 MB/s eta 0:00:01[K     |██▍                             | 20 kB 22.7 MB/s eta 0:00:01[K     |███▋                            | 30 kB 11.6 MB/s eta 0:00:01[K     |████▉                           | 40 kB 9.4 MB/s eta 0:00:01[K     |██████                          | 51 kB 5.4 MB/s eta 0:00:01[K     |███████▏                        | 61 kB 5.9 MB/s eta 0:00:01[K     |████████▍                       | 71 kB 5.7 MB/s eta 0:00:01[K     |█████████▋                      | 81 kB 6.4 MB/s eta 0:00:01[K     |██████████▊                     | 92 kB 4.9 MB/s eta 0:00:01[K     |████████████                    | 102 kB 5.2 MB/s eta 0:00:01[K     |█████████████▏                  | 112 kB 5.2 MB/s eta 0:00:01[K     |██████████████▍                 | 122 kB 5.2 MB/s eta 0:00:01[K     |███████████████▌                | 133 kB 5.2 MB/s eta 0:00:01[K     |███

# STEP 2: Wget testing data and pretrained weight

In [3]:
# Download the testing data
gdd.download_file_from_google_drive(file_id='1nEJ7NTtHcCHNQqUXaoPk55VH3Uwh4QGG',
                  dest_path='./dataset.zip',
                  unzip=True)

Downloading 1nEJ7NTtHcCHNQqUXaoPk55VH3Uwh4QGG into ./dataset.zip... Done.
Unzipping...Done.


In [4]:
# Load my pretrained model weights
gdd.download_file_from_google_drive(file_id='12j_E_J-j2RSC0hGzapnNp2oZ87IisneI',
                  dest_path='./model_final.pth',
                  unzip=True)

Downloading 12j_E_J-j2RSC0hGzapnNp2oZ87IisneI into ./model_final.pth... Done.
Unzipping...Done.


# STEP 3: Register dataset and modify config file

In [5]:
# Register dataset
inpath = "/content/dataset/train/"  # the train folder download from kaggle
images_name = listdir(inpath)
if not os.path.isdir('/content/dataset/train_new'):
    os.mkdir('/content/dataset/train_new')
outpath = "/content/dataset/train_new/"  # the folder putting all nuclei image
for f in images_name:
    image = listdir(inpath + f + "/images/")
    shutil.copyfile(inpath + f + "/images/" + image[0], outpath + image[0])
DatasetCatalog.clear()
register_coco_instances("my_dataset", {}, "nucleus_cocoformat.json", "/content/dataset/train_new")  

# Modify the cofig file
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"))  
cfg.MODEL.WEIGHTS = "/content/model_final.pth"  
cfg.OUTPUT_DIR = "./output"
cfg.DATASETS.TRAIN = ("my_dataset",)
cfg.DATASETS.TEST = ()  
cfg.DATALOADER.NUM_WORKERS = 2  
cfg.SOLVER.IMS_PER_BATCH = 2  
cfg.SOLVER.BASE_LR = 0.00025    
cfg.SOLVER.MAX_ITER = 3000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.TEST.DETECTIONS_PER_IMAGE = 2000
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
predictor = DefaultPredictor(cfg)
print(cfg)

CUDNN_BENCHMARK: False
DATALOADER:
  ASPECT_RATIO_GROUPING: True
  FILTER_EMPTY_ANNOTATIONS: True
  NUM_WORKERS: 2
  REPEAT_THRESHOLD: 0.0
  SAMPLER_TRAIN: TrainingSampler
DATASETS:
  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
  PROPOSAL_FILES_TEST: ()
  PROPOSAL_FILES_TRAIN: ()
  TEST: ()
  TRAIN: ('my_dataset',)
GLOBAL:
  HACK: 1.0
INPUT:
  CROP:
    ENABLED: False
    SIZE: [0.9, 0.9]
    TYPE: relative_range
  FORMAT: BGR
  MASK_FORMAT: polygon
  MAX_SIZE_TEST: 1333
  MAX_SIZE_TRAIN: 1333
  MIN_SIZE_TEST: 800
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
  MIN_SIZE_TRAIN_SAMPLING: choice
  RANDOM_FLIP: horizontal
MODEL:
  ANCHOR_GENERATOR:
    ANGLES: [[-90, 0, 90]]
    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]
    NAME: DefaultAnchorGenerator
    OFFSET: 0.0
    SIZES: [[32], [64], [128], [256], [512]]
  BACKBONE:
    FREEZE_AT: 2
    NAME: build_resnet_fpn_backbone
  DEVICE: cuda
  FPN:
    FUSE_TYPE: sum
    IN_FEATURES: ['res2', 'res3', 'res4', 'res5

# STEP 4: Generate answer.json for submission on Codalab

In [6]:
inpath = "/content/dataset/test/"  # test data path

with open('/content/dataset/test_img_ids.json') as f:
  test_img_ids = json.load(f)
images_name=[]
for i in range(len(test_img_ids)):
  if(test_img_ids[i]["file_name"][:4]=="TCGA"):
    images_name.append(test_img_ids[i]["file_name"])

# https://github.com/mirzaevinom/data_science_bowl_2018/blob/master/codes/predict.py
def postprocess_masks( ori_mask, ori_box, ori_score, image, min_nuc_size=15):

    """Clean overlaps between bounding boxes, fill small holes, smooth boundaries"""
    height, width = image.shape[:2]
    score_threshold = 0.9 

    # If there is no mask prediction or less than score threshold
    if len(ori_mask) == 0 or ori_score.all() < score_threshold:
        return

    keep_ind = np.where(np.sum(ori_mask, axis=(0, 1)) > min_nuc_size)[0]
    if len(keep_ind) < len(ori_mask):  # keep_ind possible to be zero zero
        if(keep_ind.shape[0] != 0):
            ori_mask = ori_mask[:keep_ind.shape[0]] 
            ori_box = ori_box[:keep_ind.shape[0]]
            ori_score = ori_score[:keep_ind.shape[0]]

        else:
            ori_mask = []
            ori_box = []
            ori_score = []
        

    overlap = np.zeros([height, width])

    masks = []
    # Removes overlaps from masks with lower score
    for i in range(len(ori_mask)):
        # Fill holes inside the mask
        mask = binary_fill_holes(ori_mask[i]).astype(np.uint8)
        # Smoothen edges using dilation and erosion
        mask = erosion(dilation(mask))
        # Delete overlaps
        overlap += mask
        mask[overlap > 1] = 0
        out_label = label(mask)
        # Remove all the pieces if there are more than one pieces
        if out_label.max() > 1:
            mask[()] = 0

        masks.append(mask)

    return masks, ori_box, ori_score

def rle_encode(bimask):
    if len(bimask.shape) == 3:
        return _mask.encode(np.asfortranarray(bimask))
    elif len(bimask.shape) == 2:
        h, w = bimask.shape
        size_count=_mask.encode(np.asfortranarray(bimask).reshape((h, w, 1), order='F'))[0]
        size_count['counts']=size_count['counts'].decode(("utf-8"))
        return size_count

In [7]:
coco_answer = []

for image_id, name in enumerate(images_name):
    image = cv2.imread(inpath + name)
    outputs = predictor(image)
    print(image_id)

    masks = outputs["instances"].to('cpu')._fields['pred_masks']
    boxes = outputs["instances"].to('cpu')._fields['pred_boxes'].tensor.tolist()
    scores = outputs["instances"].to('cpu')._fields['scores'].numpy()

    
    for i in range(len(masks)):  # Loop all instances                        
        x=boxes[i][0]
        y=boxes[i][1]
        w=boxes[i][2]-boxes[i][0]
        h=boxes[i][3]-boxes[i][1]

        ann = {"image_id": image_id+1, 
              "bbox": [x,y,w,h],
              "score":float(scores[i]),
              "category_id": int(1),
              "segmentation": rle_encode(masks[i]),
              }
        coco_answer.append(ann)

  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


0
1
2
3
4
5


In [8]:
json_answer = json.dumps(coco_answer, indent=4)
with open("answer.json", "w") as outfile:
    outfile.write(json_answer)