## Train Notebook

Here, we cover how to train Mask R-CNN+ using the Matterport implementation of Mask R-CNN

In [1]:
# Imports
import os
import sys
import random
import math
import re
import time
import numpy as np
import tensorflow as tf
import albumentations as A
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import imgaug as iaa
import skimage.io
import keras.callbacks


# Root directory of the project
ROOT_DIR = os.path.abspath(os.getcwd())

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log
from mrcnn.config import Config


import Monuseg

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
MONUSEG_DIR = os.path.join(ROOT_DIR, "datasets", "MoNuSeg")

# Comment out to reload imported modules if they change
%load_ext autoreload
%autoreload 2

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


Define augmentations, we only use light augmentations during training

In [2]:
# Set up augmentation, additional targets for dist and H-channels of image:
# (Treated as mask-type for the putposes of applying augmentations to prevent intenssity disturbances)
augmentation =  A.Compose([
    A.RandomBrightnessContrast(brightness_limit=0.5, contrast_limit=0.5, brightness_by_max=True, p=0.4),
    A.HorizontalFlip(always_apply=False, p=0.5),
    A.VerticalFlip(always_apply=False, p=0.5),
    A.Rotate(limit = 359, always_apply=False, p=0.5),
], p=1,
    additional_targets={'dist': 'mask', 'Hch': 'mask'}
)

Set up all variables needed for training. \
Hyperparameters can be found and altered in the MoNuSeg.MoNuSegConfig() class

In [3]:
# Set up variables for training
config = Monuseg.MonusegConfig()
config.NAME = "MoNuSeg_Coco"
config.display()

DEVICE = "/gpu:0"
TEST_MODE = "training" #"inference" or "training"

dataset_dir = MONUSEG_DIR
subset = "train"
dataset_dir = os.path.join(dataset_dir, subset)
image_dir = os.path.join(dataset_dir, "tissue_images")
image_ids = os.listdir(image_dir)
print("Found {} images in {}".format(len(image_ids), image_dir))



Configurations:
BACKBONE                       resnet50
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        400
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_CHANNEL_COUNT            5
IMAGE_MAX_DIM                  256
IMAGE_META_SIZE                14
IMAGE_MIN_DIM                  256
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              crop
IMAGE_SHAPE                    [256 256   5]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.0001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE               

Create model, load weights, set up dataset

In [4]:
with tf.device(DEVICE):
    model = modellib.MaskRCNN(mode=TEST_MODE, model_dir=MODEL_DIR, config=config, verbose = False)

weights_path = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")

print("Loding model from: {}".format(weights_path))

#Exclude COCO heads
model.load_weights(weights_path, by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
            "mrcnn_bbox", "mrcnn_mask"])

#model.load_weights(weights_path, by_name=True)
print("Model name: {}".format(config.NAME))

# Set up Datasets Objects 
dataset_train = Monuseg.MonusegDataset()
dataset_val = Monuseg.MonusegDataset()
dataset_train.add_class(source = "Monuseg", class_id =  1, class_name = "nucleus")
dataset_val.add_class(source = "Monuseg", class_id =  1, class_name = "nucleus")

# Fill with the samples
i = 0
for _id in image_ids:
    name,_ = os.path.splitext(_id)
    if i <= len(image_ids):
        dataset_train.add_image(source = "Monuseg",
                                image_id = name,
                                path = os.path.join(image_dir, _id))
    i+=1


dataset_val.add_image(source = "Monuseg",
                                image_id = name,
                                path = os.path.join(image_dir, _id))
    


dataset_val.prepare()
dataset_train.prepare()

print("Have {} train images".format(len(dataset_train.image_ids)))
print("Have {} val images".format(len(dataset_val.image_ids)))







Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
box_ind is deprecated, use box_indices instead
Loding model from: C:\Users\User\GitHub\Improved Mask R-CNN\mask_rcnn_coco.h5
Model name: MoNuSeg_Coco
Have 30 train images
Have 1 val images


Main training code

In [None]:
# Optimal stage-based training
model.train(dataset_train, dataset_val,
       learning_rate=config.LEARNING_RATE*2,
        epochs=5,
        augmentation=augmentation,
        layers=r"(PREPROC.*)|(conv1.*)|(mrcnn\_.*)")

#model.train(dataset_train, dataset_val,
#       learning_rate=config.LEARNING_RATE,
#        epochs=5+35,
#        augmentation=augmentation,
#        layers="all")

#model.train(dataset_train, dataset_val,
#       learning_rate=config.LEARNING_RATE * 0.5,
#        epochs=40+5,
#        augmentation=augmentation,
#        layers='5+')

#model.train(dataset_train, dataset_val,
#       learning_rate=config.LEARNING_RATE * 0.1,
#        epochs=45+5,
#        augmentation=augmentation,
#        layers='heads')




Starting at epoch 0. LR=0.0002

Checkpoint Path: C:\Users\User\GitHub\Improved Mask R-CNN\logs\monuseg_coco20210802T1430\mask_rcnn_monuseg_coco_{epoch:04d}.h5
Selecting layers to train
PREPROC                (Conv2D)
conv1                  (Conv2D)
In model:  rpn_model
mrcnn_mask_conv1       (TimeDistributed)
mrcnn_mask_bn1         (TimeDistributed)
mrcnn_mask_conv2       (TimeDistributed)
mrcnn_mask_bn2         (TimeDistributed)
mrcnn_class_conv1      (TimeDistributed)
mrcnn_class_bn1        (TimeDistributed)
mrcnn_mask_conv3       (TimeDistributed)
mrcnn_mask_bn3         (TimeDistributed)
mrcnn_class_conv2      (TimeDistributed)
mrcnn_class_bn2        (TimeDistributed)
mrcnn_mask_conv4       (TimeDistributed)
mrcnn_mask_bn4         (TimeDistributed)
mrcnn_bbox_fc          (TimeDistributed)
mrcnn_mask_deconv      (TimeDistributed)
mrcnn_class_logits     (TimeDistributed)
mrcnn_mask             (TimeDistributed)



  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "




Epoch 1/5
Epoch 2/5
13/60 [=====>........................] - ETA: 1:10 - loss: 3.6763 - rpn_class_loss: 0.9550 - rpn_bbox_loss: 1.5568 - mrcnn_class_loss: 0.0436 - mrcnn_bbox_loss: 0.6209 - mrcnn_mask_loss: 0.4999

## Ensemble

In [None]:
dataset_dir = MONUSEG_DIR
subset = "train"
dataset_dir = os.path.join(dataset_dir, subset)
    
dict_names = {}

image_dir = os.path.join(dataset_dir, "tissue_images")
image_ids = os.listdir(image_dir)
# Fill with the samples
for _id in image_ids:
    name,_ = os.path.splitext(_id)
    dict_names[name] = os.path.join(image_dir, _id)

print(image_dir)
for k in dict_names.keys():
    print(k)

In [None]:
# Mixture = 2x Bladder, Colon, Stomach
# 1: Kidney, Mixture, Liver, Prostate (No Breast)
# 2: Mixture, Liver, Prostate, Breast (No Kidney)
# 3: Liver, Prostate, Breast, Kidney (No Mixture)
# 4: Prostate, Breast, Kidney, Mixture (No Liver)
# 5: Breast, Kidney, Mixture, Liver (No Prostate) 

organ_dict = {
"Breast":["TCGA-A7-A13E-01Z-00-DX1",
    "TCGA-A7-A13F-01Z-00-DX1",
    "TCGA-AR-A1AK-01Z-00-DX1",
    "TCGA-AR-A1AS-01Z-00-DX1",
    "TCGA-E2-A14V-01Z-00-DX1",
    "TCGA-E2-A1B5-01Z-00-DX1"],
"Kidney":["TCGA-B0-5698-01Z-00-DX1",
    "TCGA-B0-5710-01Z-00-DX1",
    "TCGA-B0-5711-01Z-00-DX1",
    "TCGA-HE-7128-01Z-00-DX1",
    "TCGA-HE-7129-01Z-00-DX1",
    "TCGA-HE-7130-01Z-00-DX1"],
"Liver":["TCGA-18-5592-01Z-00-DX1",
    "TCGA-21-5784-01Z-00-DX1",
    "TCGA-21-5786-01Z-00-DX1",
    "TCGA-38-6178-01Z-00-DX1",
    "TCGA-49-4488-01Z-00-DX1",
    "TCGA-50-5931-01Z-00-DX1"],
"Prostate":["TCGA-CH-5767-01Z-00-DX1",
    "TCGA-G9-6336-01Z-00-DX1",
    "TCGA-G9-6348-01Z-00-DX1",
    "TCGA-G9-6356-01Z-00-DX1",
    "TCGA-G9-6362-01Z-00-DX1",
    "TCGA-G9-6363-01Z-00-DX1"],
"Mixture":["TCGA-KB-A93J-01A-01-TS1",
    "TCGA-RD-A8N9-01A-01-TS1",
    "TCGA-AY-A8YK-01A-01-TS1",
    "TCGA-NH-A8F7-01A-01-TS1",
    "TCGA-DK-A2I6-01A-01-TS1",
    "TCGA-G2-A2EK-01A-02-TSB"]
}

fold1 = []
for k in ["Kidney", "Liver", "Prostate", "Mixture"]:
    fold1.extend(organ_dict[k])

fold2 = []
for k in ["Mixture", "Liver", "Prostate", "Breast"]:
    fold2.extend(organ_dict[k])

fold3 = []
for k in ["Liver", "Prostate", "Breast", "Kidney"]:
    fold3.extend(organ_dict[k])
    
fold4 = []
for k in ["Prostate", "Breast", "Kidney", "Mixture"]:
    fold4.extend(organ_dict[k])
    
fold5 = []
for k in ["Breast", "Kidney", "Mixture", "Liver"]:
    fold5.extend(organ_dict[k])
    
       
fold_list = [fold1, fold2, fold3, fold4, fold5]
for f in fold_list:
    print(len(f))

In [None]:
config = Monuseg.MonusegConfig()
config.VALIDATION_STEPS = 0
i = 0

for fold in fold_list:
    # Used to choose model fold number
    if i == 2:   
        print("Model Nr. {} Fold:".format(i))
        print(fold)
        dataset_train = Monuseg.MonusegDataset()
        dataset_val = Monuseg.MonusegDataset()
        dataset_train.add_class("Monuseg", 1, "nucleus")
        # Fill with the train samples
        for _n in fold:
            dataset_train.add_image(source = "Monuseg", image_id = _n, path = dict_names[_n])
        dataset_train.prepare()
        
        config.NAME = "Monuseg_5Fold_COCO_" + str(i)
        
        with tf.device(DEVICE):
            model = modellib.MaskRCNN(mode=TEST_MODE, model_dir=MODEL_DIR, config=config, verbose = False)
        
        wp = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
        
        print("Loding model from: {}".format(wp))
        model.load_weights(wp, by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                "mrcnn_bbox", "mrcnn_mask"])        
        
        
        model.train(dataset_train, dataset_val,
               learning_rate=config.LEARNING_RATE*2,
                epochs=5,
                augmentation=augmentation,
                layers=r"(PREPROC.*)|(conv1.*)|(mrcnn\_.*)")

        #model.train(dataset_train, dataset_val,
        #       learning_rate=config.LEARNING_RATE,
        #        epochs=5+35,
        #        augmentation=augmentation,
        #        layers="all")

        #model.train(dataset_train, dataset_val,
        #       learning_rate=config.LEARNING_RATE * 0.5,
        #        epochs=40+5,
        #        augmentation=augmentation,
        #        layers='5+')

        #model.train(dataset_train, dataset_val,
        #       learning_rate=config.LEARNING_RATE * 0.1,
        #        epochs=45+5,
        #        augmentation=augmentation,
        #        layers='heads')



        del model
    i = i+1        
  