# Mask R-CNN - Train on Shapes Dataset

### Notes from implementation

This notebook shows how to train Mask R-CNN on your own dataset. To keep things simple we use a synthetic dataset of shapes (squares, triangles, and circles) which enables fast training. You'd still need a GPU, though, because the network backbone is a Resnet101, which would be too slow to train on a CPU. On a GPU, you can start to get okay-ish results in a few minutes, and good results in less than an hour.

The code of the *Shapes* dataset is included below. It generates images on the fly, so it doesn't require downloading any data. And it can generate images of any size, so we pick a small image size to train faster. 


### Dataset

Create a synthetic dataset

Extend the Dataset class and add a method to load the shapes dataset, `load_shapes()`, and override the following methods:

* load_image()
* load_mask()
* image_reference()

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import os
import sys
import random
import math
import re
import  gc
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import pprint
import keras.backend as KB
sys.path.append('../')

import mrcnn.model     as modellib
import mrcnn.visualize as visualize
import mrcnn.shapes    as shapes
from mrcnn.config      import Config
from mrcnn.model       import log
from mrcnn.dataset     import Dataset 
# from mrcnn.pc_layer    import PCTensor
# from mrcnn.pc_layer   import PCNLayer

# Root directory of the project
ROOT_DIR = os.getcwd()
MODEL_PATH = 'E:\Models'
# Directory to save logs and trained model
MODEL_DIR = os.path.join(MODEL_PATH, "mrcnn_logs")
# Path to COCO trained weights
COCO_MODEL_PATH   = os.path.join(MODEL_PATH, "mask_rcnn_coco.h5")
RESNET_MODEL_PATH = os.path.join(MODEL_PATH, "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")

print("Tensorflow Version: {}   Keras Version : {} ".format(tf.__version__,keras.__version__))
pp = pprint.PrettyPrinter(indent=2, width=100)
np.set_printoptions(linewidth=100)

# Build configuration object -----------------------------------------------
config = shapes.ShapesConfig()
config.BATCH_SIZE      = 6                    #Batch size is 2 (# GPUs * images/GPU).
config.IMAGES_PER_GPU  = 6
config.STEPS_PER_EPOCH = 2
# config.IMAGES_PER_GPU  = 1
config.FCN_INPUT_SHAPE = config.IMAGE_SHAPE[0:2]
config.display() 

# Build shape dataset        -----------------------------------------------

from mrcnn.datagen import data_generator, load_image_gt

# Training dataset
# generate 500 shapes 
dataset_train = shapes.ShapesDataset()
dataset_train.load_shapes(500, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
dataset_train.prepare()

# Validation dataset
dataset_val = shapes.ShapesDataset()
dataset_val.load_shapes(50, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
dataset_val.prepare()

# Load and display random samples
# image_ids = np.random.choice(dataset_train.image_ids, 3)
# for image_id in [3]:
#     image = dataset_train.load_image(image_id)
#     mask, class_ids = dataset_train.load_mask(image_id)
#     visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)
print(' COCO Model Path       : ', COCO_MODEL_PATH)
print(' Checkpoint folder Path: ', MODEL_DIR)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Tensorflow Version: 1.4.0   Keras Version : 2.1.3 

Configurations:
BACKBONE_SHAPES                [[32 32]
 [16 16]
 [ 8  8]
 [ 4  4]
 [ 2  2]]
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     6
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FCN_INPUT_SHAPE                [128 128]
GPU_COUNT                      1
IMAGES_PER_GPU                 6
IMAGE_MAX_DIM                  128
IMAGE_MIN_DIM                  128
IMAGE_PADDING                  True
IMAGE_SHAPE                    [128 128   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
MASK_POOL_SIZE                 14
MASK_SHAPE                     [28, 28]
MAX_GT_INSTANCES               100
MEAN_PIXEL                     [123.7 116.8 103.9]
MINI_MASK_SHAPE                (56, 56)
NAME                           shapes
NUM_CLASSES                    4
POOL_SIZE        

### Create Model

In [3]:

# del history
try :
    del model
    gc.collect()
except: 
    pass

model = modellib.MaskRCNN(mode="training", config=config, model_dir=MODEL_DIR)
#model.keras_model.summary(line_length = 120)
# print(model.find_last())

# Which weights to start with?
init_with = "last"  # imagenet, coco, or last
if init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that are different due to the different number of classes
    # See README for instructions to download the COCO weights
    loc=model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    loc= model.load_weights(model.find_last()[1], by_name=True)


>>> Set_log_dir() -- model dir is  E:\Models\mrcnn_logs
    set_log_dir: Checkpoint path set to : E:\Models\mrcnn_logs\shapes20180405T1208\mask_rcnn_shapes_{epoch:04d}.h5
>>> Generate pyramid anchors 
    Size of anchor array is : (4092, 4)
>>> RPN Outputs  <class 'list'>
      rpn_class_logits/concat:0
      rpn_class/concat:0
      rpn_bbox/concat:0
>>> Proposal Layer init complete. Size of anchors:  (4092, 4)
    layer - Scores  (6, 4092)
    layer - Deltas  (6, 4092, 4)
    layer - Anchors  (6, 4092, 4)
    layer - boxes shape / type after processing:  (6, 4092, 4) <class 'tensorflow.python.framework.ops.Tensor'>
>>> Detection Target Layer : initialization
>>> Detection Target Layer : call  <class 'list'> 4
     proposals.shape    : (6, ?, ?) <class 'tensorflow.python.framework.ops.Tensor'>
     gt_class_ids.shape : (?, ?) <class 'tensorflow.python.framework.ops.Tensor'>
     gt_bboxes.shape    : (?, ?, 4) <class 'tensorflow.python.framework.ops.Tensor'>
     gt_masks.shape     : (

## Training head using  Keras.model.fit_generator()

In [None]:
# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.

model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs_to_run =2, 
            layers='heads')

## - Training heads using train_in_batches ()

We need to use this method for the time being as the fit generator does not have provide EASY access to the output in Keras call backs. By training in batches, we pass a batch through the network, pick up the generated RoI detections and bounding boxes and generate our semantic / gaussian tensors ...


In [None]:
model.train_in_batches(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs_to_run = 2,
            layers='heads')

## Simulate one training iteration - 1

In [None]:
from mrcnn.datagen import data_generator, load_image_gt
np.set_printoptions(linewidth=100)
learning_rate=model.config.LEARNING_RATE
epochs_to_run = 2
layers='heads'
batch_size = 0
steps_per_epoch = 0
# assert self.mode == "training", "Create model in training mode."
# Pre-defined layer regular expressions
layer_regex = {
    # all layers but the backbone
    "heads": r"(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
    # From a specific Resnet stage and up
    "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
    "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
    "5+": r"(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
    # All layers
    "all": ".*",
}

if layers in layer_regex.keys():
    layers = layer_regex[layers]
if batch_size == 0 :
    batch_size = model.config.BATCH_SIZE            
if steps_per_epoch == 0:
    steps_per_epoch = model.config.STEPS_PER_EPOCH

# Data generators
train_generator = data_generator(dataset_train, model.config, shuffle=True,
                                 batch_size=batch_size)
val_generator   = data_generator(dataset_val, model.config, shuffle=True,
                                 batch_size=batch_size,
                                 augment=False)

# Train
log("Last epoch completed : {} ".format(model.epoch))
log("Starting from epoch {} for {} epochs. LR={}".format(model.epoch, epochs_to_run, learning_rate))
log("Steps per epoch:    {} ".format(steps_per_epoch))
log("Batchsize      :    {} ".format(batch_size))
log("Checkpoint Folder:  {} ".format(model.checkpoint_path))
epochs = model.epoch + epochs_to_run

from tensorflow.python.platform import gfile
if not gfile.IsDirectory(model.log_dir):
    log('Creating checkpoint folder')
    gfile.MakeDirs(model.log_dir)
else:
    log('Checkpoint folder already exists')

model.set_trainable(layers)            
model.compile(learning_rate, model.config.LEARNING_MOMENTUM)        

out_labels = model.keras_model._get_deduped_metrics_names()
callback_metrics = out_labels + ['val_' + n for n in out_labels]

progbar = keras.callbacks.ProgbarLogger(count_mode='steps')
progbar.set_model(model.keras_model)
progbar.set_params({
    'epochs': epochs,
    'steps': steps_per_epoch,
    'verbose': 1,
    'do_validation': False,
    'metrics': callback_metrics,
})

progbar.set_model(model.keras_model) 

chkpoint = keras.callbacks.ModelCheckpoint(model.checkpoint_path, 
                                           monitor='loss', verbose=1, save_best_only = True, save_weights_only=True)
chkpoint.set_model(model.keras_model)

progbar.on_train_begin()
epoch_idx = model.epoch

## Simulate one training iteration - 2

In [None]:
if epoch_idx >= epochs:
    print('Final epoch {} has already completed - Training will not proceed'.format(epochs))

# while epoch_idx < epochs :
progbar.on_epoch_begin(epoch_idx)
steps_index = 0
# for steps_index in range(steps_per_epoch):

batch_logs = {}
print(' self.epoch {}   epochs {}  step {} '.format(model.epoch, epochs, steps_index))
batch_logs['batch'] = steps_index
batch_logs['size']  = batch_size
progbar.on_batch_begin(steps_index, batch_logs)

## Simulate one training iteration - 3

In [None]:
imgmeta_idx= model.keras_model.input_names.index('input_image_meta')
img_meta  =  train_batch_x[imgmeta_idx]

image_id = img_meta[0,0]
print('Image id: ',image_id)
image = dataset_train.load_image(image_id)
mask, class_ids = dataset_train.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

image_id = img_meta[1,0]
print('Image id: ',image_id)
image = dataset_train.load_image(image_id)
mask, class_ids = dataset_train.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

outs = model.keras_model.train_on_batch(train_batch_x, train_batch_y)

## Process outside of training 

### Create Model

In [None]:
try :
    del model, train_generator, val_generator, mm
    gc.collect()
except: 
    pass

model = modellib.MaskRCNN(mode="training", config=config, model_dir=MODEL_DIR)
#model.keras_model.summary(line_length = 120) 

# Which weights to start with?
init_with = "last"  # imagenet, coco, or last
if init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that are different due to the different number of classes
    # See README for instructions to download the COCO weights
    loc=model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    loc= model.load_weights(model.find_last()[1], by_name=True)



### Define Data Generator

In [None]:
train_generator = data_generator(dataset_train, model.config, shuffle=True,
                                 batch_size=model.config.BATCH_SIZE,
                                 augment = False)
val_generator = data_generator(dataset_val, model.config, shuffle=True, 
                                batch_size=model.config.BATCH_SIZE,
                                augment=False)

In [None]:
model.compile_only(learning_rate=config.LEARNING_RATE, layers='heads')
# print(KB.eval(KB.learning_phase()))
KB.set_learning_phase(1)
print(' Learning phase values is L ' ,KB.learning_phase())
mm = model.keras_model
print('\n Metrics (_get_deduped_metrics_names():) ') 
pp.pprint(mm._get_deduped_metrics_names())
print('\n Outputs: ') 
pp.pprint(mm.outputs)
print('\n Losses (model.metrics_names): ') 
# pp.pprint(mm.losses)
pp.pprint(mm.metrics_names)

In [None]:
model.keras_model.summary(line_length = 120) 

### Get next shapes from generator and display loaded shapes

In [None]:
train_batch_x, train_batch_y = next(train_generator)

### Display loaded shapes

In [None]:
# train_batch_x, train_batch_y = next(train_generator)
imgmeta_idx = model.keras_model.input_names.index('input_image_meta')
img_meta    = train_batch_x[imgmeta_idx]

image_id = img_meta[0,0]
print('Image id: ',image_id)
image = dataset_train.load_image(image_id)
mask, class_ids = dataset_train.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

image_id = img_meta[1,0]
print('Image id: ',image_id)
image = dataset_train.load_image(image_id)
mask, class_ids = dataset_train.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

In [None]:
from   mrcnn.utils            import parse_image_meta_graph
a,b,c,d = parse_image_meta_graph(img_meta)

### Push Data thru model using get_layer_output()

In [None]:
from mrcnn.callbacks import get_layer_output_1,get_layer_output_2
np.set_printoptions(linewidth=100,precision=4)
pp = pprint.PrettyPrinter(indent=4)
 
layers_out = get_layer_output_2(model.keras_model, train_batch_x, 1)


In [None]:
print(layers_out[16], layers_out[17])

In [None]:
imgmeta_idx = mm.input_names.index('input_image_meta')
img_meta    = train_batch_x[imgmeta_idx]

image_id = img_meta[0,0]
print('Image id: ',image_id)
image = dataset_train.load_image(image_id)
mask, class_ids = dataset_train.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

image_id = img_meta[1,0]
print('Image id: ',image_id)
image = dataset_train.load_image(image_id)
mask, class_ids = dataset_train.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

#### Plot mask in string format

In [None]:
# np.set_printoptions(threshold=99999, linewidth=2000)
# print(np.array2string(mask[...,0],max_line_width=2000,separator=''))
 

In [None]:
outmask0 = layers_out[14][0,:,:,:,1] ##  mrcnn_mask
np.max(outmask0)

### Plot Predicition Probability Tensors

In [None]:
%matplotlib notebook
from mrcnn.visualize import plot_gaussian
Zout = layers_out[1]
num_images = config.IMAGES_PER_GPU
num_classes = config.NUM_CLASSES
for img in range(num_images):
    for cls in range(num_classes):
        ttl = 'image :  {} class: {} '.format(img,cls)
        plot_gaussian(Zout[img,cls], title = ttl)

### Plot Ground Truth Probability tensors

In [None]:
# i = 1
# print(layers_out[i].shape)      #[0,0,0:20, 0:20]
Zout = layers_out[2]
num_images = config.IMAGES_PER_GPU
num_classes = config.NUM_CLASSES
for img in range(num_images):
    for cls in range(num_classes):
        ttl = 'image :  {} class: {} '.format(img,cls)
        plot_gaussian(Zout[img,cls], title = ttl)

### Display predicted bounding boxes - calculate center and width/height of bboxes displayed 

In [None]:
from mrcnn.utils import trim_zeros
np.set_printoptions( edgeitems=32, suppress=True)
pred_bb = layers_out[3]
print(pred_bb.shape)
x0 = [ trim_zeros((pred_bb[0,i,:,:])) for i in range(4)]
ps0 = np.concatenate( x0, axis=0 )

x1 = [ trim_zeros((pred_bb[1,i,:,:])) for i in range(4)]
ps1 = np.concatenate( x1, axis=0 )
# print(np.concatenate( x1, axis=0 ))
print(ps0)
print(ps0.shape)
width  = ps0[:,5] - ps0[:,3]
height = ps0[:,4] - ps0[:,2]
cx     = ps0[:,3] + ( width  / 2.0)
cy     = ps0[:,2] + ( height / 2.0)
means0  = np.stack((cx,cy,width, height),axis = -1)
print(means0)

#### Output RoIs (Normalized)

In [None]:
output_rois = layers_out[0]
output_rois[0,:,:]

In [None]:
# from mrcnn.pc_layer import PCTensor
# np.set_printoptions(precision=4,edgeitems=32)
# pc_tensor = PCTensor(model)
# pc_tensor.build_predictions(train_batch_x)
# # pc_tensor.pred_tensor[1]
# pc_tensor.pred_stacked[0]

### Display ground truth bboxes from Shapes database (using load_image_gt)

Here we are displaying the ground truth bounding boxes as provided by the dataset

In [None]:
image_id = img_meta[0,0]
print('Image id: ',image_id)
p_original_image, p_image_meta, p_gt_class_id, p_gt_bbox, p_gt_mask =  \
            load_image_gt(dataset_train, config, image_id, augment=False, use_mini_mask=True)
# print(p_gt_class_id.shape, p_gt_bbox.shape, p_gt_mask.shape)
print(p_gt_bbox)
visualize.draw_boxes(p_original_image, p_gt_bbox)

image_id = img_meta[1,0]
print('Image id: ',image_id)
p_original_image, p_image_meta, p_gt_class_id, p_gt_bbox, p_gt_mask =  \
            load_image_gt(dataset_train, config, image_id, augment=False, use_mini_mask=True)
# print(p_gt_class_id.shape, p_gt_bbox.shape, p_gt_mask.shape)
print(p_gt_bbox)
visualize.draw_boxes(p_original_image, p_gt_bbox)

### Display bboxes from Ground Truth Info - Input info Passed to Network 

layers_out[5]  gt_tensor is based on input_gt_class_ids and input_normlzd_gt_boxes

Display the Ground Truth bounding boxes from the tensor we've constructed

In [None]:
np.set_printoptions(linewidth=120, precision=5)
gt_bboxes = layers_out[5]  
print(layers_out[5].shape)
print(' gt_cls_cnt')
print(layers_out[6])
print(layers_out[5][1,3])

In [None]:
# print(gt_bboxes)
# visualize.display_instances(p_original_image, p_gt_bbox, p_gt_mask, p_gt_class_id, 
#                             dataset_train.class_names, figsize=(8, 8))
# pp.pprint(gt_bboxes)
img = 0
image_id = img_meta[img,0]
print('Image id: ',image_id)
p_image, p_image_meta, p_gt_class_id, p_gt_bbox, p_gt_mask =  \
            load_image_gt(dataset_train, config, image_id, augment=False, use_mini_mask=True)
gt_bboxes = layers_out[5]
print(gt_bboxes.shape)
print(gt_bboxes[0,1,0:1,2:6])
print(gt_bboxes[0,2,0:2,2:6])
gt_bb = np.vstack((gt_bboxes[0,1,0:1,2:6],gt_bboxes[0,2,0:2,2:6],gt_bboxes[0,3,0:2,2:6]))
gt_bb.shape
visualize.draw_boxes(p_image, gt_bb)

## Display RoI proposals generated

Display bounding boxes from tensor of proposals produced by the network 
Square: 1 , Circle:2 , Triangle -3

In [None]:
img = 0
cls = 3  # <==== Class to dispaly
image_id = img_meta[img,0]
print('Image id: ',image_id)
p_image, p_image_meta, p_gt_class_id, p_gt_bbox, p_gt_mask =  \
            load_image_gt(dataset_train, config, image_id, augment=False, use_mini_mask=True)

pred_tensor = layers_out[3]
caps = [str(cls)+'-'+str(x) for x in pred_tensor[img,cls,:,0].astype('int16').tolist() ]
print(caps)
# print(pc_tensor.pred_tensor[1,3,:])
# print(pc_tensor.pred_tensor[1,3,:,2:6])
visualize.draw_boxes(p_image, pred_tensor[img,cls,:,2:6], captions = caps)

For each class:
- determine the center of each bounding box.
- center a 2d gaussian distribution with the mean = center of bounding box and sigma = height/width
- place dist on mesh grid
- normalize
- draw heatmap

In [None]:
np.set_printoptions(precision=5)
from mrcnn.pc_layer import PCTensor
pc_tensor = PCTensor(model)
pc_tensor.build_predictions(sample_x)
print(pc_tensor.pred_stacked)    # list of tensors


### Plot Image 0

In [None]:
from mrcnn.visualize import plot_gaussian
num_images = config.IMAGES_PER_GPU
num_classes = config.NUM_CLASSES
for img in range(num_images):
    for cls in range(num_classes):
        ttl = 'image :  {} class: {} '.format(img,cls)
        plot_gaussian(Zout1[img,cls], title = ttl)

### Plot Image 1

In [None]:
# img = 0
# cls = 0
# _cnt = pc_tensor.pred_cls_cnt[img,cls]
# print(_cnt)
# for box in range(_cnt):

#     mns = means[img,cls, 0 : _cnt]
#     print('img: ',img, 'class: ', cls, 'class count: ',_cnt, 'shape of mns :',mns.shape)
#     # print('** bbox is : ' ,self.pred_tensor[img,cls,box])
#     # print('    center is ({:4f},{:4f})  width is {:4f} height is {:4f} '\
#         # .format(mns[0],mns[1],width[img,cls,box],height[img,cls,box]))            
#     # fn = lambda x: multivariate_normal(x, [[12,0.0] , [0.0,19]])
#     # rv = tf.map_fn(fn, 
#     rv = np.apply_along_axis(multivariate_normal, 1, mns, [[12,0.0] , [0.0,19]])
#     print('rv :',rv.shape, rv)
#     _zo = rv.pdf(pos[img,cls])
#     print('zo :',_zo.shape)

In [None]:
tfd = tf.contrib.distributions
k_sess = KB.get_session()

In [None]:
# pp1 = tf.fill([1,1,32], 12.0)
# pp2 = tf.fill([1,1,32], 19.0)
# pp  = tf.cast(tf.stack((pp1,pp2),axis=-1), dtype=tf.float64)
# tf.cast([12.0, 19.00], dtype=tf.float64)
# pp1.eval(session = k_sess)

# mvn = tfd.MultivariateNormalDiag(means[0,0,0,:],scale_diag=p1)
# mvn = tfd.MultivariateNormalDiag(means[0,0,0,:],scale_diag=p1)

# with k_sess.as_default():
#     print(mvn.mean())
#     print(mvn.batch_shape)
#     print(mvn.event_shape)
#     print(pos[0,0,:,0,0,:].shape)
#     rr = mvn.prob(pos[0,0,:,0,0,:])
#     print(rr)

In [None]:
# %matplotlib notebook
# from mrcnn.visualize import plot_gaussian
# for i in range(0,config.IMAGES_PER_GPU):
#     for j in range(0,config.NUM_CLASSES):
#         ttl = 'image : {} class: {}'.format(i,j)
#         plot_gaussian(Zout[i,j] , title = ttl )
# # plot_gaussian(Zout[3])


In [None]:
# Zout = np.zeros((num_classes, 128,128))
for i in range(1,config.NUM_CLASSES):
    print('class: ',i)
    for j in range(gt_cls_cnt[i]):
        Zout[i] = bbox_gaussian(gt_cpb[i,j], Zout[i])
print(Zout.shape)
 
# plot_gaussian(Zout[1])
# plot_gaussian(Zout[3])

## Fine Tuning
Fine tune all layers

In [None]:
# Fine tune all layers
# Passing layers="all" trains all layers. You can also 
# pass a regular expression to select which layers to
# train by name pattern.
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE / 10,
            epochs=211,
            layers="all")

## Save 

In [None]:
# Save weights
# Typically not needed because callbacks save after every epoch
# Uncomment to save manually
model_path = os.path.join(MODEL_DIR, "mask_rcnn_shapes.h5")
model.keras_model.save_weights(model_path)

## Detection

In [None]:
class InferenceConfig(ShapesConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
model_path = model.find_last()[1]

# Load trained weights (fill in path to trained weights here)
assert model_path != "", "Provide path to trained weights"
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

In [None]:
# Test on a random image
image_id = random.choice(dataset_val.image_ids)

original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset_val, inference_config, image_id, use_mini_mask=False)

log("original_image", original_image)
log("image_meta", image_meta)
log("gt_class_id", gt_bbox)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)

visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
                            dataset_train.class_names, figsize=(8, 8))

In [None]:
results = model.detect([original_image], verbose=1)

r = results[0]
visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], 
                            dataset_val.class_names, r['scores'], ax=get_ax())

## Evaluation

In [None]:
# Compute VOC-Style mAP @ IoU=0.5
# Running on 10 images. Increase for better accuracy.
image_ids = np.random.choice(dataset_val.image_ids, 10)
APs = []
for image_id in image_ids:
    # Load image and ground truth data
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
        modellib.load_image_gt(dataset_val, inference_config,
                               image_id, use_mini_mask=False)
    molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
    # Run object detection
    results = model.detect([image], verbose=0)
    r = results[0]
    # Compute AP
    AP, precisions, recalls, overlaps =\
        utils.compute_ap(gt_bbox, gt_class_id,
                         r["rois"], r["class_ids"], r["scores"])
    APs.append(AP)
    
print("mAP: ", np.mean(APs))

In [None]:
# lay = mm.layers[229]
# print(lay.__class__, lay.__class__.__name__)
# pp.pprint(dir(lay))
# pp.pprint(lay.input_spec.__dict__)
# pp.pprint(lay.output.__dict__)
# print(type(lay.output))
# print(keras.backend.is_keras_tensor(lay))
# print(K.eval(lay.output))

In [None]:
# print(pred_index.shape, pred_class.shape, pred_prob.shape)
# b_cpb = np.column_stack((pred_index, pred_class, pred_prob, rois)) # , b_probs)) #.transpose()
# print(' b_cpb shape: ',b_cpb.shape,'\n',b_cpb)

# print(b_cpb[:,3:] bbox_delta)

# nonbg_idx = np.argwhere(b_cpb[:,1]) 

# print(type(nonbg_idx))
# b_cpb_nonbg = b_cpb[nonbg_idx,:].squeeze()

# print(b_cpb_nonbg)
# order = b_cpb_nonbg[:,2].argsort()



# print('\n srtd_cpb : (idx, class, prob, y1, x1, y2, x2)',srtd_cpb.shape, '\n')
# print(srtd_cpb)

# # srtd_cpb_2 has (idx, cls_idx, prob, cx ,cy, width, height) instead of (idx, cls_idx, prob, y1, x1, y2, x2)

# width  = srtd_cpb[:,6]-srtd_cpb[:,4]
# height = srtd_cpb[:,5]-srtd_cpb[:,3]
# cx = srtd_cpb[:,4] + ( width  / 2.0)
# cy = srtd_cpb[:,3] + ( height / 2.0)
# print('\n srtd_cpb_2 : (idx, class, prob, cx ,cy, width, height) instead of (y1, x1, y2, x2)')
# srtd_cpb_2 = np.column_stack((srtd_cpb[:, 0:3], cx,cy, width, height ))

# print('\n',srtd_cpb_2)

### Scrap Code

In [None]:
# def get_layer_output(model, model_input,output_layer, training_flag = True):
#     _my_input = model_input 
#     for name,inp in zip(model.input_names, model_input):
#         print(' Input Name:  ({:24}) \t  Input shape: {}'.format(name, inp.shape))


#     _mrcnn_class = KB.function(model.input , model.output)
#     output = _mrcnn_class(_my_input)                  
    
#     for name,out in zip (model.output_names,output):
#         print(' Output Name: ({:24}) \t Output shape: {}'.format(name, out.shape))
#     return output

In [None]:
def stack_tensor(model):
    pred_cpb_all = np.empty((0,8))
    for i in range(1,model.config.NUM_CLASSES):
    if pred_cls_cnt[i] > 0:
        pred_cpb_all = np.vstack((pred_cpb_all, pred_cpb[i,0:pred_cls_cnt[i]] ))
    

In [None]:
from scipy.stats import  multivariate_normal
import numpy as np
def bbox_gaussian( bbox, Zin ):
    """
    receive a bounding box, and generate a gaussian distribution centered on the bounding box and with a 
    covariance matrix based on the width and height of the bounding box/. 
    Inputs : 
    --------
    bbox :  (index, class_id, class_prob, y1, x1, y2, x2)
    bbox :  (index, class_id, class_prob, cx, cy, width, height)
    Returns:
    --------
    bbox_g  grid mesh [image_height, image width] covering the distribution

    """
    print(bbox.shape)
    width  = bbox[6] - bbox[4]
    height = bbox[5] - bbox[3]
    cx     = bbox[4] + ( width  / 2.0)
    cy     = bbox[3] + ( height / 2.0)
#     cx, cy, width, height = bbox[3:]
    print('center is ({},{}) width: {}  height: {} '.format(cx, cy, width,  height))
#     srtd_cpb_2 = np.column_stack((srtd_cpb[:, 0:2], cx,cy, width, height ))
    X = np.arange(0, 128, 1)
    Y = np.arange(0, 128, 1)
    X, Y = np.meshgrid(X, Y)
    pos = np.empty(X.shape+(2,))   # concatinate shape of x to make ( x.rows, x.cols, 2)
    pos[:,:,0] = X;
    pos[:,:,1] = Y;

    rv = multivariate_normal([cx,cy],[[12,0.0] , [0.0,19]])
    Zout  = rv.pdf(pos)
    Zout += Zin
    return Zout

## Notebook Preferences

In [None]:
def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

## Configurations

In [None]:
# from keras import backend as KB
# if 'tensorflow' == KB.backend():
#     import tensorflow as tf
#     from keras.backend.tensorflow_backend import set_session
#     # tfconfig = tf.ConfigProto(
#         # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5),
#         # device_count = {'GPU': 1}
#     # )    
#     tfconfig = tf.ConfigProto()
#     tfconfig.gpu_options.allow_growth=True
#     tfconfig.gpu_options.visible_device_list = "0"
#     tfconfig.gpu_options.per_process_gpu_memory_fraction=0.5
#     tf_sess = tf.Session(config=tfconfig)
#     set_session(tf_sess)

### Prepare Prediction Tensor (output tensor)
Using the softmax outputs from the mrcnn head of the network and predicted RoIs , 
build tensor [ num-classes, num_rois, (roi_information) ]

In [None]:
# np.set_printoptions(precision=4)

# // pass model to TensorBuilder

# h, w = config.IMAGE_SHAPE[:2]

# class_idx = mm.output_names.index('mrcnn_class')
# bbox_idx  = mm.output_names.index('mrcnn_bbox')
# outroi_idx= mm.output_names.index('output_rois')


# print('mrcnn_class idx: {}   mrcnn_bbox idx : {}   output_rois idx : {}'.format(class_idx, bbox_idx,outroi_idx))


# mrcnn_class = ppp[class_idx]
# mrcnn_bbox  = ppp[bbox_idx]
# rois_norm   = ppp[outroi_idx][0,...] 
# rois        = rois_norm * np.array([h,w,h,w])

# num_classes = config.NUM_CLASSES
# num_rois    = config.TRAIN_ROIS_PER_IMAGE
# num_max_gt  = config.DETECTION_MAX_INSTANCES
# num_cols    = 8 

# pred_arr    = np.zeros((num_classes, num_rois, num_cols ))      # 4, 32, 7
# pred_cpb    = np.zeros_like(pred_cpb)
# pred_cls_cnt= np.zeros((num_classes), dtype='int16')

# print(' mrcnn_bbox shape is : ',mrcnn_bbox.shape, ' pred_cpb shape is   : ',pred_cpb.shape  )

# # use the argmaxof each row to determine the dominating (predicted) class
# #---------------------------------------------------------------------------
# pred_class = np.argmax(mrcnn_class[0,:,:],axis=1).astype('int16')   # (32,)

# # pred_index = np.arange(pred_class.shape[0],dtype='int16')
# # pred_prob  =    np.max(mrcnn_class[0,:,:],axis=1)                   #  (32,)
# # dont need it for now. Need to see if and how we should apply  the delta to the bounding box coords
# # pred_delta   = mrcnn_bbox[0,pred_index[:],pred_class[:],:]        

# for i in range(num_classes) :
#     class_idxs = np.argwhere(pred_class == i )
#     pred_cls_cnt[i] = class_idxs.shape[0] 
#     for j , c_idx in enumerate(class_idxs):
#         pred_arr[i, j,  0]  = j
#         pred_arr[i, j,  1]  = i                                   # class_id
#         pred_arr[i, j,  2]  = np.max(mrcnn_class[0, c_idx ])      # probability
#         pred_arr[i, j,3:7]  = rois[c_idx]                         # roi coordinates
#         pred_arr[i, j,  7]  = c_idx                               # index from mrcnn_class array (temp for verification)
        
        
# # sort each class in descending prediction order 

# order = pred_arr[:,:,2].argsort()

# for i in range(num_classes):
#     pred_cpb[i,:,1:] =  pred_arr[i,order[i,::-1],1:]      
# pred_cpb[:,:,0] = pred_arr[:,:,0]

# print('pred_cpb shape', pred_cpb.shape)

# print(pred_cpb_all)

# Display values for sanity check 
# i = 0
# print(pred_cls_cnt)
# print(' pred_cpb ')
# print(pred_cpb[i])
# print(' pred_srtd ')
# print(pred_srtd[i])

### Prepare output tensor from Ground Truth data 

Using ground truth inputs to the network, build tensor [ num-classes, num_rois, (roi_information) ]
This Tensor will be used for training purposed on the new head we will be attaching to the existing network 

In [None]:
# gtcls_idx = mm.input_names.index('input_gt_class_ids')
# gtbox_idx = mm.input_names.index('input_gt_boxes')
# gtmsk_idx = mm.input_names.index('input_gt_masks')
# print('gtcls_idx: ',gtcls_idx, 'gtbox_idx :', gtbox_idx)
# gt_classes = sample_x[gtcls_idx][0,:]
# gt_bboxes  = sample_x[gtbox_idx][0,:,:]

# gt_cpb     = np.zeros((num_classes, num_max_gt, num_cols ))      # 4, 32, 7
# gt_cls_cnt = np.zeros((num_classes), dtype='int16')
# # gt_masks   = sample_x[gtmsk_idx][0,:,:,nz_idx]
# # gt_indexes = np.arange(gt_classes.shape[0],dtype='int16')
# # gt_probs   = np.ones(gt_classes.shape[0])

# print('gt_classes.shape :',gt_classes.shape, 'gt_boxes.shape :',gt_bboxes.shape,'gt_masks.shape :', gt_masks.shape)
 
# for i in range(num_classes) :
#     print('indexes for class',i )
#     class_idxs = np.argwhere(gt_classes == i )
#     gt_cls_cnt[i] = class_idxs.shape[0]
#     for j , c_idx in enumerate(class_idxs):
#         gt_cpb[i, j,  0]  = j
#         gt_cpb[i, j,  1]  = i                                   # class_id
#         gt_cpb[i, j,  2]  = 1.0                                 # probability
#         gt_cpb[i, j, 3:7] = gt_bboxes[c_idx,:]                         # roi coordinates
#         gt_cpb[i, j,  7]  = c_idx                               # index from mrcnn_class array (temp for verification)

# gt_cpb_all = np.empty((0,8))
# for i in range(1,num_classes):
#     if gt_cls_cnt[i] > 0:
#         gt_cpb_all = np.vstack((gt_cpb_all, gt_cpb[i,0:gt_cls_cnt[i]] ))
# print(gt_cpb_all)

# print('\n gt_cpb : (idx, class, prob, y1, x1, y2, x2)', gt_cpb.shape, '\n')
# print(gt_cls_cnt)
# print(gt_cpb[3])

In [None]:
# pdf = np.apply_along_axis(lambda x: x.pdf, 0, rv)

rv = list( map(multivariate_normal, mns, pp))
print(type(rv),len(rv))
pdf = list(map(lambda x: x.pdf, rv,  ))
# z =  fn.pdf(pos[img,cls])
# np.sum(pc_tensor.pred_cls_cnt,axis=1)    sum class counts across images
# pc_tensor.pred_cls_cnt[1,0] = 9  # manipulate the class counts for class 0 - just to check


In [None]:
from scipy.stats import  multivariate_normal
import pprint
pp = pprint.PrettyPrinter(indent=2, width=100)

img_h, img_w = config.IMAGE_SHAPE[:2]
num_images   = config.BATCH_SIZE
num_classes  = config.NUM_CLASSES  
num_rois     = config.TRAIN_ROIS_PER_IMAGE
#   print(bbox.shape)

X = np.arange(0, img_w, 1)
Y = np.arange(0, img_h, 1)
X, Y = np.meshgrid(X, Y)
pos = np.empty((num_rois,) + X.shape + (2,))   # concatinate shape of x to make ( x.rows, x.cols, 2)
print(pos.shape)
pos[:,:,:,0] = X;
pos[:,:,:,1] = Y;

pp1 = np.full((32), 12.0)
pp2 = np.full((32), 19.0)
cov  = np.stack((pp1,pp2),axis=-1)

del pp1,pp2
print(cov.shape, prt[0].shape, prt[1].shape)

prt = pc_tensor.pred_stacked
Zout  = np.zeros((num_images, num_classes, img_w, img_h))

for img in range(num_images):
    ps     = prt[img].eval(session=k_sess)
    
    for cls in range(num_classes):
        cls_idxs = np.argwhere(_ps[:,6] == cls).squeeze() 
#         ps = _ps[cls_idxs,:]        
        print('cl;s:',cls,' ',cls_idxs)
        width  = ps[:,5] - ps[:,3]
        height = ps[:,4] - ps[:,2]
        cx     = ps[:,3] + ( width  / 2.0)
        cy     = ps[:,2] + ( height / 2.0)
        means  = np.stack((cx,cy),axis = -1)

        print(ps.shape, type(ps),width.shape, height.shape, cx.shape, cy.shape, type(means),means.shape)
    
        rv  = list( map(multivariate_normal, means, cov))
        pdf = list( map(lambda x,y: x.pdf(y) , rv, pos))
        pdf_arr = np.asarray(pdf)
        print(pdf_arr.shape)
        pdf_sum = np.sum(pdf_arr[[cls_idxs]],axis=0)
        Zout[img,cls] += pdf_sum


In [None]:
# import tensorflow as tf

# p = tf.Variable(pc_tensor.pred_tensor)
# q = tf.concat(pc_tensor.pred_stacked,0)
# init=tf.global_variables_initializer()
# with tf.Session() as sess:
#     sess.run(init)
#     print(p.eval())
#     rec=sess.run(recall)
#     print(rec)


# sess = tf.InteractiveSession()
# with sess.as_default():
#     sess.run(init)
#     # a = tf.constant(pc_tensor.pred_tensor)
#     print(type(p))
#     #  tf.assign(p, a)
#     print(p[1,1,:,:].eval())
#     print(p[:,:,:,1].eval())
#     sort_idx = tf.nn.top_k(p[:,:,:,1], k=32).indices
#     print(sort_idx.eval())
#     print(p.shape)
#     p_sorted[0] = tf.gather(p[0],sort_idx[0],axis = 2)
#     print(p_sorted.eval())
#     print(q.shape)
#     print(q.eval())
# sess.close()

## Training

Train in two stages:
1. Only the heads. Here we're freezing all the backbone layers and training only the randomly initialized layers (i.e. the ones that we didn't use pre-trained weights from MS COCO). To train only the head layers, pass `layers='heads'` to the `train()` function.

2. Fine-tune all layers. For this simple example it's not necessary, but we're including it to show the process. Simply pass `layers="all` to train all layers.

### Training head using  Keras.model.fit_generator()

In [None]:
# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.

model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs=69, 
            layers='heads')

### Training heads using train_on_batch()

We need to use this method for the time being as the fit generator does not have provide EASY access to the output in Keras call backs. By training in batches, we pass a batch through the network, pick up the generated RoI detections and bounding boxes and generate our semantic / gaussian tensors ...



In [None]:
model.train_in_batches(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs_to_run = 2,
            layers='heads')

### Simulate one training iteration

In [None]:
from mrcnn.datagen import data_generator, load_image_gt
np.set_printoptions(linewidth=100)
learning_rate=model.config.LEARNING_RATE
epochs_to_run = 2
layers='heads'
batch_size = 0
steps_per_epoch = 0
# assert self.mode == "training", "Create model in training mode."
# Pre-defined layer regular expressions
layer_regex = {
    # all layers but the backbone
    "heads": r"(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
    # From a specific Resnet stage and up
    "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
    "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
    "5+": r"(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)",
    # All layers
    "all": ".*",
}

if layers in layer_regex.keys():
    layers = layer_regex[layers]
if batch_size == 0 :
    batch_size = model.config.BATCH_SIZE            
if steps_per_epoch == 0:
    steps_per_epoch = model.config.STEPS_PER_EPOCH

# Data generators
train_generator = data_generator(dataset_train, model.config, shuffle=True,
                                 batch_size=batch_size)
val_generator   = data_generator(dataset_val, model.config, shuffle=True,
                                 batch_size=batch_size,
                                 augment=False)

# Train
log("Last epoch completed : {} ".format(model.epoch))
log("Starting from epoch {} for {} epochs. LR={}".format(model.epoch, epochs_to_run, learning_rate))
log("Steps per epoch:    {} ".format(steps_per_epoch))
log("Batchsize      :    {} ".format(batch_size))
log("Checkpoint Folder:  {} ".format(model.checkpoint_path))
epochs = model.epoch + epochs_to_run

from tensorflow.python.platform import gfile
if not gfile.IsDirectory(model.log_dir):
    log('Creating checkpoint folder')
    gfile.MakeDirs(model.log_dir)
else:
    log('Checkpoint folder already exists')

model.set_trainable(layers)            
model.compile(learning_rate, model.config.LEARNING_MOMENTUM)        

out_labels = model.keras_model._get_deduped_metrics_names()
callback_metrics = out_labels + ['val_' + n for n in out_labels]

progbar = keras.callbacks.ProgbarLogger(count_mode='steps')
progbar.set_model(model.keras_model)
progbar.set_params({
    'epochs': epochs,
    'steps': steps_per_epoch,
    'verbose': 1,
    'do_validation': False,
    'metrics': callback_metrics,
})

progbar.set_model(model.keras_model) 

chkpoint = keras.callbacks.ModelCheckpoint(model.checkpoint_path, 
                                           monitor='loss', verbose=1, save_best_only = True, save_weights_only=True)
chkpoint.set_model(model.keras_model)

progbar.on_train_begin()
epoch_idx = model.epoch

In [None]:
if epoch_idx >= epochs:
    print('Final epoch {} has already completed - Training will not proceed'.format(epochs))

# while epoch_idx < epochs :
progbar.on_epoch_begin(epoch_idx)
steps_index = 0
# for steps_index in range(steps_per_epoch):

batch_logs = {}
print(' self.epoch {}   epochs {}  step {} '.format(model.epoch, epochs, steps_index))
batch_logs['batch'] = steps_index
batch_logs['size']  = batch_size
progbar.on_batch_begin(steps_index, batch_logs)

In [None]:
train_batch_x, train_batch_y = next(train_generator)

In [None]:
imgmeta_idx= model.keras_model.input_names.index('input_image_meta')
img_meta  =  train_batch_x[imgmeta_idx]

image_id = img_meta[0,0]
print('Image id: ',image_id)
image = dataset_train.load_image(image_id)
mask, class_ids = dataset_train.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

image_id = img_meta[1,0]
print('Image id: ',image_id)
image = dataset_train.load_image(image_id)
mask, class_ids = dataset_train.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

In [None]:
outs = model.keras_model.train_on_batch(train_batch_x, train_batch_y)

In [None]:
if not isinstance(outs, list):
    outs = [outs]
for l, o in zip(out_labels, outs):
    batch_logs[l] = o

    progbar.on_batch_end(steps_index, batch_logs)

        # print(outs)
    progbar.on_epoch_end(epoch_idx, {})
    # if (epoch_idx % 10) == 0:
    chkpoint.on_epoch_end(epoch_idx  , batch_logs)
    epoch_idx += 1

# if epoch_idx != self.epoch:
# chkpoint.on_epoch_end(epoch_idx -1, batch_logs)
model.epoch = max(epoch_idx - 1, epochs)

print('Final : self.epoch {}   epochs {}'.format(model.epoch, epochs))
# end if (else)