In [21]:
import os, glob
import sys
import random
import math
import re
import time
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Root directory of the project
ROOT_DIR = os.path.abspath(".")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log

import imgaug.augmenters as augmenters

import samples.nucleus

import keras.backend

K = keras.backend.backend()
if K=='tensorflow':
    keras.backend.set_image_dim_ordering('tf')

%matplotlib inline 

# Directory to save logs and trained model
LOGS_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

In [2]:
import pickle 

CLUSTER_BASE = os.path.join(ROOT_DIR, "../clusters") 

with open(os.path.join(CLUSTER_BASE, 'all_files.pk'), 'rb') as f:
    FILE_LIST = pickle.load(f)
    
FILE_LIST = [os.path.join(CLUSTER_BASE, f) for f in FILE_LIST]

In [5]:
class BalancedNucleiDataset(utils.Dataset):
    
    def prepare(self):
        self.name = "nuclei"
        self.add_class(self.name, 1, "nucleus")
        for i, basePath in enumerate(FILE_LIST):
            imgRef = os.path.basename(basePath)
            self.add_image(self.name, image_id=i, 
                           path=basePath+'.png',
                           maskPath=glob.glob(basePath + '/*.png'),
                           img_ref=imgRef)
        super().prepare()
        
    def load_image(self, image_id):
        image = skimage.io.imread(self.image_info[image_id]['path'])[:,:,:3]
        return image
        
    def image_reference(self, image_id):
        info = self.image_info[image_id]
        if info["source"] == self.name:
            return info['img_ref']
        else:
            return super(self.__class__).image_reference(self, image_id)

    def load_mask(self, image_id):
        masks = np.dstack([skimage.io.imread(f) for f in self.image_info[image_id]['maskPath']])
        return masks, np.repeat(1, masks.shape[-1])
    
def getDatasets(cvPart):
    dataset_train = BalancedNucleiDataset()
    dataset_train.prepare()

    dataset_val = BalancedNucleiDataset()
    dataset_val.prepare()
    return dataset_train, dataset_val

In [11]:
# No real validation set for now - it gets little tricky given that:
# 1. In pre-processing data is clustered and balanced by clusters. 
# 2. While training there are random augmentations

dataset_train, dataset_val =getDatasets(9)
len(dataset_train.image_info), len(dataset_val.image_info)

(1379, 1379)

In [12]:
class NucleusConfig1(nucleus.NucleusConfig):
    IMAGE_MIN_DIM = 256
    IMAGE_MAX_DIM = 256
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    OPTIMIZER = 'SGD'
    STEPS_PER_EPOCH = len(dataset_train.image_info)
    VALIDATION_STEPS = 100
    MEAN_PIXEL = np.array([43.53287505,   39.56061986,   48.22454996]) 
    BACKBONE = "resnet50"
    
config = NucleusConfig1()
config.display()


Configurations:
BACKBONE                       resnet50
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [ 0.1  0.1  0.2  0.2]
DETECTION_MAX_INSTANCES        400
DETECTION_MIN_CONFIDENCE       0
DETECTION_NMS_THRESHOLD        0.3
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_MAX_DIM                  256
IMAGE_META_SIZE                14
IMAGE_MIN_DIM                  256
IMAGE_MIN_SCALE                2.0
IMAGE_RESIZE_MODE              crop
IMAGE_SHAPE                    [256 256   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
MASK_POOL_SIZE                 14
MASK_SHAPE                     [28, 28]
MAX_GT_INSTANCES               200
MEAN_PIXEL                     [ 43.53287505  39.56061986  48.22454996]
MINI_MASK_SHAPE                (56, 56)
NAME                           nucleus
NUM_CLASSES                    2
OPTIMIZER         

In [14]:
augmentation = augmenters.OneOf([
                    augmenters.Fliplr(1),
                    augmenters.Flipud(1),
                    augmenters.Affine(rotate=90),
                    augmenters.Affine(rotate=-90),
                    augmenters.Sequential([
                        augmenters.Affine(shear=(-8,8)),
                        augmenters.Crop(percent=0.05)
                    ])
                ])

In [17]:
model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=LOGS_DIR)

In [22]:
# Which weights to start with?
init_with = "last"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    model.load_weights(model.find_last()[1], by_name=True)
    print("Loaded ", model.find_last()[1])

In [None]:
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs=5, 
            layers='heads',
            augmentation=augmentation)

In [None]:
def cleanup():
    for f in sorted(glob.glob(MODEL_DIR + '/nucleus*/mask_rcnn_acnd_*'))[:-1]:
        os.remove(f)

In [None]:
cleanup()
model.train(dataset_train, dataset_val, 
        learning_rate=config.LEARNING_RATE,
        epochs=10, 
        layers="all",
        augmentation=augmentation)

In [None]:
cleanup()
model.train(dataset_train, dataset_val, 
        learning_rate=config.LEARNING_RATE,
        epochs=10, 
        layers="all",
        augmentation=augmentation)

In [None]:
model.config.OPTIMIZER = 'Adam'