In [None]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import skimage.io

# Root directory of the project
ROOT_DIR = os.path.abspath("../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log

import lab

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

LAB_DIR = os.path.join(ROOT_DIR, "lab")

## Configurations

In [None]:
class_names = lab.class_names
# class_names = [
#     'BG', 'bontaname', 'caloriemate', 'caramel'
# ]
# class_names = [
#     'BG', 'apollo', 'bontaname', 'caloriemate', 'caramel', 'chocoball',
#     'highremon', 'macadamia', 'sanipe', 'strawberry', 'zeraisu'
# ]
# class_names = [
#     'BG', 'blendy', 'buttercookies', 'caloriemate', 'chocopie', 'choice',
#     'zeraisu', 'jagabee', 'levain', 'meltykiss', 'pocky'
# ]
# class_names = [
#     'BG', 'caloriemate', 'meltykiss', 'zeraisu'
# ]

In [None]:
config = lab.LabConfig()

In [None]:
# Override the training configurations with a few
# changes for inferencing.
class InferenceConfig(config.__class__):
    # Run detection on one image at a time
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    NUM_CLASSES = len(class_names)

config = InferenceConfig()
config.display()

In [None]:
# Device to load the neural network on.
# Useful if you're training a model on the same 
# machine, in which case use CPU and leave the
# GPU for training.

DEVICE = "/gpu:0"  # /cpu:0 or /gpu:0
#DEVICE = "/cpu:0"

# Inspect the model in training or inference modes
# values: 'inference' or 'training'
# TODO: code for 'training' test mode not ready yet
TEST_MODE = "inference"

In [None]:
def get_ax(rows=1, cols=1, size=16):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Adjust the size attribute to control how big to render images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

## Load Model

In [None]:
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
# weights_path = LAB_DIR + "/models/20200706/caloriemate/mask_rcnn_lab_0050.h5"
# weights_path = LAB_DIR + "/models/20200723/one-class/mask_rcnn_lab_0100.h5"
weights_path = LAB_DIR + "/models/20200707/one-class/mask_rcnn_lab_0100.h5"

# Load weights
print("Loading weights ", weights_path)
model.load_weights(weights_path, by_name=True)

## Visualize Model Architecture

In [None]:
model.keras_model.summary()

In [None]:
from keras.utils import plot_model
from IPython.display import Image

plot_model(model.keras_model, show_shapes=True, show_layer_names=True, to_file='model.png')
Image('model.png')

# Show Result

In [None]:
TEST_DIR = os.path.join(LAB_DIR, "test/caloriemate")
for file in sorted(os.listdir(TEST_DIR)):
    if not file.endswith(".png") and not file.endswith(".jpg"): continue
    image = skimage.io.imread(TEST_DIR + "/" + file)
    
    results = model.detect([image], verbose=1)
    
    r = results[0]
    visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])

In [None]:
import imgaug.augmenters as iaa

augment_param = iaa.Affine(rotate=90)

TEST_DIR = os.path.join(LAB_DIR, "test/caloriemate")
for file in sorted(os.listdir(TEST_DIR)):
    if not file.endswith(".png") and not file.endswith(".jpg"): continue
    image = skimage.io.imread(TEST_DIR + "/" + file)
    image = augment_param.augment_image(image)
    
    results = model.detect([image], verbose=1)
    
    r = results[0]
    visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])

## Save Result

In [None]:
import cv2
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.figure import Figure

fs = cv2.FileStorage("./intrinsic.xml", cv2.FILE_STORAGE_READ)
camera_matrix = fs.getNode("camera_matrix").mat()
dist_coeffs = fs.getNode("distortion_coefficients").mat()

# test_class = [
#     'blendy', 'buttercookies', 'caloriemate', 'chocopie', 'choice',
#     'zeraisu', 'jagabee', 'levain', 'meltykiss', 'pocky'
# ]
test_class = ["caloriemate"]
    
for class_name in test_class:   
    TEST_DIR = os.path.join(LAB_DIR, "test/" + class_name)
    SAVE_DIR = os.path.join(LAB_DIR, "test/result/20200723/one-class_epoch100/" + class_name + "_undistorted_synthetic")
    
    if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)
    
    for file in sorted(os.listdir(TEST_DIR)):
        if not file.endswith(".png") and not file.endswith(".jpg"): continue
    #     image = skimage.io.imread(TEST_DIR + "/" + file)

    #     skimage.io.imsave(SAVE_DIR + "/" + file, image)

        image = cv2.imread(TEST_DIR + "/" + file)    
        cv2.imwrite(SAVE_DIR + "/" + file, image)

        image = cv2.undistort(image, camera_matrix, dist_coeffs)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        results = model.detect([image], verbose=1)    
        result = results[0]

        height, width = image.shape[:2]
        plt.rcParams["figure.subplot.left"] = 0
        plt.rcParams["figure.subplot.bottom"] = 0
        plt.rcParams["figure.subplot.right"] = 1
        plt.rcParams["figure.subplot.top"] = 1

        fig = Figure(figsize=(width/100,height/100))
        canvas = FigureCanvasAgg(fig)
        axes = fig.gca()
        visualize.display_instances(image, result['rois'], result['masks'],
                                    result['class_ids'], class_names,
                                    result['scores'], ax=axes)

        canvas.draw()
        result = np.fromstring(canvas.tostring_rgb(), dtype='uint8')

        _, _, w, h = fig.bbox.bounds
        result = result.reshape((int(h), int(w), 3))

        save_name = file.split(".")[0] + "_res.png"
        skimage.io.imsave(SAVE_DIR + "/" + save_name, result)

## Compute mAP @ IoU=50 on Validation Data

In [None]:
# Load validation dataset
dataset = lab.LabDataset()
dataset.load_lab(LAB_DIR, "val")

# Must call before using the dataset
dataset.prepare()

In [None]:
# same as utils.py function
def compute_overlaps_masks(masks1, masks2):
    """Computes IoU overlaps between two sets of masks.
    masks1, masks2: [Height, Width, instances]
    """
    
    # If either set of masks is empty return empty result
    if masks1.shape[-1] == 0 or masks2.shape[-1] == 0:
        return np.zeros((masks1.shape[-1], masks2.shape[-1]))
    # flatten masks and compute their areas
    masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
    masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
    area1 = np.sum(masks1, axis=0)
    area2 = np.sum(masks2, axis=0)
    print("pred_mask", masks1.shape)
    print("gt_mask", masks2.shape)
    print("pred_mask_area", area1.shape)
    print("gt_mask_area", area2.shape)

    # intersections and union
    intersections = np.dot(masks1.T, masks2)
    print("intersections", intersections.shape)
    union = area1[:, None] + area2[None, :] - intersections
    print("union", union.shape)
    overlaps = intersections / union
    print("overlaps", overlaps.shape)

    return overlaps

def trim_zeros(x):
    """It's common to have tensors larger than the available data and
    pad with zeros. This function removes rows that are all zeros.
    x: [rows, columns].
    """
    assert len(x.shape) == 2
    return x[~np.all(x == 0, axis=1)]

def compute_matches(gt_boxes, gt_class_ids, gt_masks,
                    pred_boxes, pred_class_ids, pred_scores, pred_masks,
                    iou_threshold=0.5, score_threshold=0.0):
    """Finds matches between prediction and ground truth instances.
    Returns:
        gt_match: 1-D array. For each GT box it has the index of the matched
                  predicted box.
        pred_match: 1-D array. For each predicted box, it has the index of
                    the matched ground truth box.
        overlaps: [pred_boxes, gt_boxes] IoU overlaps.
    """
    # Trim zero padding
    # TODO: cleaner to do zero unpadding upstream
    gt_boxes = trim_zeros(gt_boxes)
    gt_masks = gt_masks[..., :gt_boxes.shape[0]]
    pred_boxes = trim_zeros(pred_boxes)
    pred_scores = pred_scores[:pred_boxes.shape[0]]
    # Sort predictions by score from high to low
    indices = np.argsort(pred_scores)[::-1]
    pred_boxes = pred_boxes[indices]
    pred_class_ids = pred_class_ids[indices]
    pred_scores = pred_scores[indices]
    pred_masks = pred_masks[..., indices]

    # Compute IoU overlaps [pred_masks, gt_masks]
    overlaps = compute_overlaps_masks(pred_masks, gt_masks)

    # Loop through predictions and find matching ground truth boxes
    match_count = 0
    print("pred_boxes shape", pred_boxes.shape)
    pred_match = -1 * np.ones([pred_boxes.shape[0]])
    gt_match = -1 * np.ones([gt_boxes.shape[0]])
    for i in range(len(pred_boxes)):
        # Find best matching ground truth box
        # 1. Sort matches by score
        sorted_ixs = np.argsort(overlaps[i])[::-1]
        # 2. Remove low scores
        low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
        if low_score_idx.size > 0:
            sorted_ixs = sorted_ixs[:low_score_idx[0]]
        # 3. Find the match
        for j in sorted_ixs:
            # If ground truth box is already matched, go to next one
            if gt_match[j] > -1:
                continue
            # If we reach IoU smaller than the threshold, end the loop
            iou = overlaps[i, j]
            if iou < iou_threshold:
                break
            # Do we have a match?
            if pred_class_ids[i] == gt_class_ids[j]:
                match_count += 1
                gt_match[j] = i
                pred_match[i] = j
                break

    return gt_match, pred_match, overlaps

def compute_ap(gt_boxes, gt_class_ids, gt_masks,
               pred_boxes, pred_class_ids, pred_scores, pred_masks,
               iou_threshold=0.5):
    """Compute Average Precision at a set IoU threshold (default 0.5).
    Returns:
    mAP: Mean Average Precision
    precisions: List of precisions at different class score thresholds.
    recalls: List of recall values at different class score thresholds.
    overlaps: [pred_boxes, gt_boxes] IoU overlaps.
    """
    # Get matches and overlaps
    gt_match, pred_match, overlaps = compute_matches(
        gt_boxes, gt_class_ids, gt_masks,
        pred_boxes, pred_class_ids, pred_scores, pred_masks,
        iou_threshold)

    # Compute precision and recall at each prediction box step
    precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
    recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)

    # Pad with start and end values to simplify the math
    precisions = np.concatenate([[0], precisions, [0]])
    recalls = np.concatenate([[0], recalls, [1]])

    # Ensure precision values decrease but don't increase. This way, the
    # precision value at each recall threshold is the maximum it can be
    # for all following recall thresholds, as specified by the VOC paper.
    for i in range(len(precisions) - 2, -1, -1):
        precisions[i] = np.maximum(precisions[i], precisions[i + 1])

    # Compute mean AP over recall range
    indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
    mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
                 precisions[indices])

    return mAP, precisions, recalls, overlaps

image_id = np.random.choice(dataset.image_ids)
image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset, config, image_id, use_mini_mask=False)

results = model.detect([image], verbose=0)
# Compute AP
r = results[0]
AP, precisions, recalls, overlaps =\
    compute_ap(gt_bbox, gt_class_id, gt_mask, r['rois'], r['class_ids'], r['scores'], r['masks'])

In [None]:
def compute_batch_ap(image_ids):
    APs = []
    for image_id in image_ids:
        # Load image
        image, image_meta, gt_class_id, gt_bbox, gt_mask =\
            modellib.load_image_gt(dataset, config,
                                   image_id, use_mini_mask=False)
        # Run object detection
        results = model.detect([image], verbose=0)
        # Compute AP
        r = results[0]
        AP, precisions, recalls, overlaps =\
            utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                              r['rois'], r['class_ids'], r['scores'], r['masks'])
        APs.append(AP)
    return APs

# Pick a set of random images
image_ids = np.random.choice(dataset.image_ids, 10)
# image_ids = dataset.image_ids
APs = compute_batch_ap(image_ids)
print("mAP @ IoU=50: ", np.mean(APs))

## Compute Recall

In [None]:
# Load validation dataset
dataset = lab.LabDataset()
dataset.load_lab(LAB_DIR, "val")

# Must call before using the dataset
dataset.prepare()

In [None]:
lab.class_names = class_names

In [None]:
recalls = []
for image_id in dataset.image_ids:
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
            modellib.load_image_gt(dataset, config,
                                   image_id, use_mini_mask=False)    
    
    # Run object detection
    results = model.detect([image], verbose=0)
    r = results[0]
    
    gt_masks = np.reshape(gt_mask > 0.5, (-1, gt_mask.shape[-1])).astype(np.float32)
    pr_masks = np.reshape(r["masks"] > 0.5, (-1, r["masks"].shape[-1])).astype(np.float32)
  
    visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])
    
    matches = 0    
    for gt in gt_masks.T:
        ious = []
        for pr in pr_masks.T:
            overlap = np.sum(np.logical_and(gt, pr))
            iou = overlap / np.sum(np.logical_or(gt, pr))
            ious.append(iou)
            
        #print("max iou :", max(ious))
        if max(ious) > 0.7:
            matches += 1
            
    recall = matches/gt_masks.shape[-1]
    print("recall :", recall)
    recalls.append(recall)
    
print(recalls)
print("Average recall", np.mean(recalls))
print("max recall", np.max(recalls))
print("min recall", np.min(recalls))

## Step by Step Prediction

## Stage 0: Backbone (Residual Network)

In [None]:
from pylab import rcParams
rcParams['figure.figsize'] = 20,10

In [None]:
lab.class_names = class_names

# Load validation dataset
dataset = lab.LabDataset()
dataset.load_lab(LAB_DIR, "val")

# Must call before using the dataset
dataset.prepare()

In [None]:
image_id = random.choice(dataset.image_ids)
image_id = 9
image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset, config, image_id, use_mini_mask=False)

plt.imshow(image)
plt.show()

In [None]:
conv1 = model.run_graph([image], [
    ("input_image",              tf.identity(model.keras_model.get_layer("input_image").output)),
    ("zero_padding2d_1_input",   model.keras_model.get_layer("zero_padding2d_1").input), 
    ("zero_padding2d_1_output",  model.keras_model.get_layer("zero_padding2d_1").output),
    ("conv1_input",              model.keras_model.get_layer("conv1").input), 
    ("conv1_output",             model.keras_model.get_layer("conv1").output),
    ("bn_conv1_input",           model.keras_model.get_layer("bn_conv1").input),
    ("bn_conv1_output",          model.keras_model.get_layer("bn_conv1").output),
    ("activation_1_input",       model.keras_model.get_layer("activation_1").input),
    ("activation_1_output",      model.keras_model.get_layer("activation_1").output),
    ("max_pooling2d_1_input",    model.keras_model.get_layer("max_pooling2d_1").input),
    ("max_pooling2d_1_output",   model.keras_model.get_layer("max_pooling2d_1").output),
])

In [None]:
res2a = model.run_graph([image], [
    ("res2a_branch2a_input",              model.keras_model.get_layer("res2a_branch2a").input), 
    ("res2a_branch2a_output",             model.keras_model.get_layer("res2a_branch2a").output),
    ("bn2a_branch2a_input",           model.keras_model.get_layer("bn2a_branch2a").input),
    ("bn2a_branch2a_output",          model.keras_model.get_layer("bn2a_branch2a").output),
    ("activation_2_input",       model.keras_model.get_layer("activation_2").input),
    ("activation_2_output",      model.keras_model.get_layer("activation_2").output),
    ("res2a_branch2b_input",              model.keras_model.get_layer("res2a_branch2b").input), 
    ("res2a_branch2b_output",             model.keras_model.get_layer("res2a_branch2b").output),
    ("bn2a_branch2b_input",           model.keras_model.get_layer("bn2a_branch2b").input),
    ("bn2a_branch2b_output",          model.keras_model.get_layer("bn2a_branch2b").output),
    ("activation_3_input",       model.keras_model.get_layer("activation_3").input),
    ("activation_3_output",      model.keras_model.get_layer("activation_3").output),
    ("res2a_branch2c_input",              model.keras_model.get_layer("res2a_branch2c").input), 
    ("res2a_branch2c_output",             model.keras_model.get_layer("res2a_branch2c").output),
    ("bn2a_branch2c_input",           model.keras_model.get_layer("bn2a_branch2c").input),
    ("bn2a_branch2c_output",          model.keras_model.get_layer("bn2a_branch2c").output),
    
    ("res2a_branch1_input",              model.keras_model.get_layer("res2a_branch1").input), 
    ("res2a_branch1_output",             model.keras_model.get_layer("res2a_branch1").output),
    ("bn2a_branch1_input",           model.keras_model.get_layer("bn2a_branch1").input),
    ("bn2a_branch1_output",          model.keras_model.get_layer("bn2a_branch1").output),
    
    ("add_1_input",              tf.identity(model.keras_model.get_layer("add_1").input)), 
    ("add_1_output",             model.keras_model.get_layer("add_1").output),
    ("res2a_out_input",              model.keras_model.get_layer("res2a_out").input), 
    ("res2a_out_output",             model.keras_model.get_layer("res2a_out").output),
])

In [None]:
res2b = model.run_graph([image], [
    ("res2b_branch2a_input",              model.keras_model.get_layer("res2b_branch2a").input), 
    ("res2b_branch2a_output",             model.keras_model.get_layer("res2b_branch2a").output),
    ("bn2b_branch2a_input",           model.keras_model.get_layer("bn2b_branch2a").input),
    ("bn2b_branch2a_output",          model.keras_model.get_layer("bn2b_branch2a").output),
    ("activation_4_input",       model.keras_model.get_layer("activation_4").input),
    ("activation_4_output",      model.keras_model.get_layer("activation_4").output),
    ("res2b_branch2b_input",              model.keras_model.get_layer("res2b_branch2b").input), 
    ("res2b_branch2b_output",             model.keras_model.get_layer("res2b_branch2b").output),
    ("bn2b_branch2b_input",           model.keras_model.get_layer("bn2b_branch2b").input),
    ("bn2b_branch2b_output",          model.keras_model.get_layer("bn2b_branch2b").output),
    ("activation_5_input",       model.keras_model.get_layer("activation_5").input),
    ("activation_5_output",      model.keras_model.get_layer("activation_5").output),
    ("res2b_branch2c_input",              model.keras_model.get_layer("res2b_branch2c").input), 
    ("res2b_branch2c_output",             model.keras_model.get_layer("res2b_branch2c").output),
    ("bn2b_branch2c_input",           model.keras_model.get_layer("bn2b_branch2c").input),
    ("bn2b_branch2c_output",          model.keras_model.get_layer("bn2b_branch2c").output),
        
    ("add_2_input",              tf.identity(model.keras_model.get_layer("add_2").input)), 
    ("add_2_output",             model.keras_model.get_layer("add_2").output),
    ("res2b_out_input",              model.keras_model.get_layer("res2b_out").input), 
    ("res2b_out_output",             model.keras_model.get_layer("res2b_out").output),
])

In [None]:
# To be long, let's check key layer onlly
res2c = model.run_graph([image], [
    ("res2c_branch2a_input",              model.keras_model.get_layer("res2c_branch2a").input), 
    ("res2c_branch2a_output",             model.keras_model.get_layer("res2c_branch2a").output),
    #...
    ("res2c_branch2c_input",              model.keras_model.get_layer("res2c_branch2c").input), 
    ("res2c_branch2c_output",             model.keras_model.get_layer("res2c_branch2c").output),
    ("bn2c_branch2c_input",           model.keras_model.get_layer("bn2c_branch2c").input),
    ("bn2c_branch2c_output",          model.keras_model.get_layer("bn2c_branch2c").output),
        
    ("add_3_input",              tf.identity(model.keras_model.get_layer("add_3").input)), 
    ("add_3_output",             model.keras_model.get_layer("add_3").output),
    ("res2c_out_input",              model.keras_model.get_layer("res2c_out").input), 
    ("res2c_out_output",             model.keras_model.get_layer("res2c_out").output),
])

In [None]:
res3a = model.run_graph([image], [
    ("res3a_branch2a_input",              model.keras_model.get_layer("res3a_branch2a").input), 
    ("res3a_branch2a_output",             model.keras_model.get_layer("res3a_branch2a").output),
    #...
    ("res3a_branch2c_input",              model.keras_model.get_layer("res3a_branch2c").input), 
    ("res3a_branch2c_output",             model.keras_model.get_layer("res3a_branch2c").output),
    ("bn3a_branch2c_input",           model.keras_model.get_layer("bn3a_branch2c").input),
    ("bn3a_branch2c_output",          model.keras_model.get_layer("bn3a_branch2c").output),
    
    ("res3a_branch1_input",              model.keras_model.get_layer("res3a_branch1").input), 
    ("res3a_branch1_output",             model.keras_model.get_layer("res3a_branch1").output),
    ("bn3a_branch1_input",           model.keras_model.get_layer("bn3a_branch1").input),
    ("bn3a_branch1_output",          model.keras_model.get_layer("bn3a_branch1").output),
    
    ("fpn_c2p2_input",           model.keras_model.get_layer("fpn_c2p2").input),
    ("fpn_c2p2_output",          model.keras_model.get_layer("fpn_c2p2").output),
    
    ("add_4_input",              tf.identity(model.keras_model.get_layer("add_4").input)), 
    ("add_4_output",             model.keras_model.get_layer("add_4").output),
    ("res3a_out_input",              model.keras_model.get_layer("res3a_out").input), 
    ("res3a_out_output",             model.keras_model.get_layer("res3a_out").output),
])

In [None]:
res3b = model.run_graph([image], [
    ("res3b_branch2a_input",              model.keras_model.get_layer("res3b_branch2a").input), 
    ("res3b_branch2a_output",             model.keras_model.get_layer("res3b_branch2a").output),
    #...
    ("res3b_branch2c_input",              model.keras_model.get_layer("res3b_branch2c").input), 
    ("res3b_branch2c_output",             model.keras_model.get_layer("res3b_branch2c").output),
    ("bn3b_branch2c_input",           model.keras_model.get_layer("bn3b_branch2c").input),
    ("bn3b_branch2c_output",          model.keras_model.get_layer("bn3b_branch2c").output),
    
    ("add_5_input",              tf.identity(model.keras_model.get_layer("add_5").input)), 
    ("add_5_output",             model.keras_model.get_layer("add_5").output),
    ("res3b_out_input",              model.keras_model.get_layer("res3b_out").input), 
    ("res3b_out_output",             model.keras_model.get_layer("res3b_out").output),
])

#res3c

#res3d
res3d = model.run_graph([image], [    
    ("add_7_input",              tf.identity(model.keras_model.get_layer("add_7").input)), 
    ("add_7_output",             model.keras_model.get_layer("add_7").output),
    ("res3d_out_input",              model.keras_model.get_layer("res3d_out").input), 
    ("res3d_out_output",             model.keras_model.get_layer("res3d_out").output),
])


In [None]:
res4a = model.run_graph([image], [
    ("res4a_branch2a_input",              model.keras_model.get_layer("res4a_branch2a").input), 
    ("res4a_branch2a_output",             model.keras_model.get_layer("res4a_branch2a").output),
    #...
    ("res4a_branch2c_input",              model.keras_model.get_layer("res4a_branch2c").input), 
    ("res4a_branch2c_output",             model.keras_model.get_layer("res4a_branch2c").output),
    ("bn4a_branch2c_input",           model.keras_model.get_layer("bn4a_branch2c").input),
    ("bn4a_branch2c_output",          model.keras_model.get_layer("bn4a_branch2c").output),
    
    ("res4a_branch1_input",              model.keras_model.get_layer("res4a_branch1").input), 
    ("res4a_branch1_output",             model.keras_model.get_layer("res4a_branch1").output),
    ("bn4a_branch1_input",           model.keras_model.get_layer("bn4a_branch1").input),
    ("bn4a_branch1_output",          model.keras_model.get_layer("bn4a_branch1").output),
    
    ("fpn_c3p3_input",           model.keras_model.get_layer("fpn_c3p3").input),
    ("fpn_c3p3_output",          model.keras_model.get_layer("fpn_c3p3").output),
    
    ("add_8_input",              tf.identity(model.keras_model.get_layer("add_8").input)), 
    ("add_8_output",             model.keras_model.get_layer("add_8").output),
    ("res4a_out_input",              model.keras_model.get_layer("res4a_out").input), 
    ("res4a_out_output",             model.keras_model.get_layer("res4a_out").output),
])

In [None]:
res4b = model.run_graph([image], [
    ("res4b_branch2a_input",              model.keras_model.get_layer("res4b_branch2a").input), 
    ("res4b_branch2a_output",             model.keras_model.get_layer("res4b_branch2a").output),
    #...
    ("res4b_branch2c_input",              model.keras_model.get_layer("res4b_branch2c").input), 
    ("res4b_branch2c_output",             model.keras_model.get_layer("res4b_branch2c").output),
    ("bn4b_branch2c_input",           model.keras_model.get_layer("bn4b_branch2c").input),
    ("bn4b_branch2c_output",          model.keras_model.get_layer("bn4b_branch2c").output),
    
    ("add_9_input",              tf.identity(model.keras_model.get_layer("add_9").input)), 
    ("add_9_output",             model.keras_model.get_layer("add_9").output),
    ("res4b_out_input",              model.keras_model.get_layer("res4b_out").input), 
    ("res4b_out_output",             model.keras_model.get_layer("res4b_out").output),
])

# res4c ~ res4w
res4w = model.run_graph([image], [
    ("res4w_branch2a_input",              model.keras_model.get_layer("res4w_branch2a").input), 
    ("res4w_branch2a_output",             model.keras_model.get_layer("res4w_branch2a").output),
    #...
    ("res4w_branch2c_input",              model.keras_model.get_layer("res4w_branch2c").input), 
    ("res4w_branch2c_output",             model.keras_model.get_layer("res4w_branch2c").output),
    ("bn4w_branch2c_input",           model.keras_model.get_layer("bn4w_branch2c").input),
    ("bn4w_branch2c_output",          model.keras_model.get_layer("bn4w_branch2c").output),
    
    ("add_30_input",              tf.identity(model.keras_model.get_layer("add_30").input)), 
    ("add_30_output",             model.keras_model.get_layer("add_30").output),
    ("res4w_out_input",              model.keras_model.get_layer("res4w_out").input), 
    ("res4w_out_output",             model.keras_model.get_layer("res4w_out").output),
])

In [None]:
res5a = model.run_graph([image], [
    ("res5a_branch2a_input",              model.keras_model.get_layer("res5a_branch2a").input), 
    ("res5a_branch2a_output",             model.keras_model.get_layer("res5a_branch2a").output),
    #...
    ("res5a_branch2c_input",              model.keras_model.get_layer("res5a_branch2c").input), 
    ("res5a_branch2c_output",             model.keras_model.get_layer("res5a_branch2c").output),
    ("bn5a_branch2c_input",           model.keras_model.get_layer("bn5a_branch2c").input),
    ("bn5a_branch2c_output",          model.keras_model.get_layer("bn5a_branch2c").output),
    
    ("res5a_branch1_input",              model.keras_model.get_layer("res5a_branch1").input), 
    ("res5a_branch1_output",             model.keras_model.get_layer("res5a_branch1").output),
    ("bn5a_branch1_input",           model.keras_model.get_layer("bn5a_branch1").input),
    ("bn5a_branch1_output",          model.keras_model.get_layer("bn5a_branch1").output),
    
    ("fpn_c4p4_input",           model.keras_model.get_layer("fpn_c4p4").input),
    ("fpn_c4p4_output",          model.keras_model.get_layer("fpn_c4p4").output),
    
    ("add_31_input",              tf.identity(model.keras_model.get_layer("add_31").input)), 
    ("add_31_output",             model.keras_model.get_layer("add_31").output),
    ("res5a_out_input",              model.keras_model.get_layer("res5a_out").input), 
    ("res5a_out_output",             model.keras_model.get_layer("res5a_out").output),
])

In [None]:
res5b = model.run_graph([image], [
    ("res5b_branch2a_input",              model.keras_model.get_layer("res5b_branch2a").input), 
    ("res5b_branch2a_output",             model.keras_model.get_layer("res5b_branch2a").output),
    #...
    ("res5b_branch2c_input",              model.keras_model.get_layer("res5b_branch2c").input), 
    ("res5b_branch2c_output",             model.keras_model.get_layer("res5b_branch2c").output),
    ("bn5b_branch2c_input",           model.keras_model.get_layer("bn5b_branch2c").input),
    ("bn5b_branch2c_output",          model.keras_model.get_layer("bn5b_branch2c").output),
    
    ("add_32_input",              tf.identity(model.keras_model.get_layer("add_32").input)), 
    ("add_32_output",             model.keras_model.get_layer("add_32").output),
    ("res5b_out_input",              model.keras_model.get_layer("res5b_out").input), 
    ("res5b_out_output",             model.keras_model.get_layer("res5b_out").output),
])

res5c = model.run_graph([image], [    
    ("res5c_out_input",              model.keras_model.get_layer("res5c_out").input), 
    ("res5c_out_output",             model.keras_model.get_layer("res5c_out").output),
])


In [None]:
fpn = model.run_graph([image], [    
    ("fpn_c5p5_input",           model.keras_model.get_layer("fpn_c5p5").input),
    ("fpn_c5p5_output",          model.keras_model.get_layer("fpn_c5p5").output),    
    ("fpn_p5_input",           model.keras_model.get_layer("fpn_p5").input),
    ("fpn_p5_output",          model.keras_model.get_layer("fpn_p5").output),
    ("fpn_p6_input",           model.keras_model.get_layer("fpn_p6").input),
    ("fpn_p6_output",          model.keras_model.get_layer("fpn_p6").output),
    ("fpn_p5upsampled_input",           model.keras_model.get_layer("fpn_p5upsampled").input),
    ("fpn_p5upsampled_output",          model.keras_model.get_layer("fpn_p5upsampled").output),
    
    ("fpn_p4add_input",           tf.identity(model.keras_model.get_layer("fpn_p4add").input)),
    ("fpn_p4add_output",          model.keras_model.get_layer("fpn_p4add").output),
    ("fpn_p4_input",           model.keras_model.get_layer("fpn_p4").input),
    ("fpn_p4_output",          model.keras_model.get_layer("fpn_p4").output),
    ("fpn_p4upsampled_input",           model.keras_model.get_layer("fpn_p4upsampled").input),
    ("fpn_p4upsampled_output",          model.keras_model.get_layer("fpn_p4upsampled").output),
    
    ("fpn_p3add_input",           tf.identity(model.keras_model.get_layer("fpn_p3add").input)),
    ("fpn_p3add_output",          model.keras_model.get_layer("fpn_p3add").output),
    ("fpn_p3_input",           model.keras_model.get_layer("fpn_p3").input),
    ("fpn_p3_output",          model.keras_model.get_layer("fpn_p3").output),
    ("fpn_p3upsampled_input",           model.keras_model.get_layer("fpn_p3upsampled").input),
    ("fpn_p3upsampled_output",          model.keras_model.get_layer("fpn_p3upsampled").output),
    
    ("fpn_p2add_input",           tf.identity(model.keras_model.get_layer("fpn_p2add").input)),
    ("fpn_p2add_output",          model.keras_model.get_layer("fpn_p2add").output),
    ("fpn_p2_input",           model.keras_model.get_layer("fpn_p2").input),
    ("fpn_p2_output",          model.keras_model.get_layer("fpn_p2").output),
])



## Stage 1: Region Proposal Network

The Region Proposal Network (RPN) runs a lightweight binary classifier on a lot of boxes (anchors) over the image and returns object/no-object scores. Anchors with high *objectness* score (positive anchors) are passed to the stage two to be classified.

Often, even positive anchors don't cover objects fully. So the RPN also regresses a refinement (a delta in location and size) to be applied to the anchors to shift it and resize it a bit to the correct boundaries of the object.

## 1.a RPN Targets
The RPN targets are the training values for the RPN. To generate the targets, we start with a grid of anchors that cover the full image at different scales, and then we compute the IoU of the anchors with ground truth object. Positive anchors are those that have an IoU >= 0.7 with any ground truth object, and negative anchors are those that don't cover any object by more than 0.3 IoU. Anchors in between (i.e. cover an object by IoU >= 0.3 but < 0.7) are considered neutral and excluded from training.

To train the RPN regressor, we also compute the shift and resizing needed to make the anchor cover the ground truth object completely.

In [None]:
from pylab import rcParams
rcParams['figure.figsize'] = 20,10

In [None]:
lab.class_names = class_names

# Load validation dataset
dataset = lab.LabDataset()
dataset.load_lab(LAB_DIR, "val")

# Must call before using the dataset
dataset.prepare()

In [None]:
image_id = random.choice(dataset.image_ids)
image_id = 9
image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset, config, image_id, use_mini_mask=False)

plt.imshow(image)
plt.show()

In [None]:
# Generate RPN trainig targets
# target_rpn_match is 1 for positive anchors, -1 for negative anchors
# and 0 for neutral anchors.
target_rpn_match, target_rpn_bbox = modellib.build_rpn_targets(
    image.shape, model.anchors, gt_class_id, gt_bbox, model.config)
log("target_rpn_match", target_rpn_match)
log("target_rpn_bbox", target_rpn_bbox)

positive_anchor_ix = np.where(target_rpn_match[:] == 1)[0]
negative_anchor_ix = np.where(target_rpn_match[:] == -1)[0]
neutral_anchor_ix = np.where(target_rpn_match[:] == 0)[0]
positive_anchors = model.anchors[positive_anchor_ix]
negative_anchors = model.anchors[negative_anchor_ix]
neutral_anchors = model.anchors[neutral_anchor_ix]
log("positive_anchors", positive_anchors)
log("negative_anchors", negative_anchors)
log("neutral anchors", neutral_anchors)

# Apply refinement deltas to positive anchors
refined_anchors = utils.apply_box_deltas(
    positive_anchors,
    target_rpn_bbox[:positive_anchors.shape[0]] * model.config.RPN_BBOX_STD_DEV)
log("refined_anchors", refined_anchors, )

In [None]:
# Display positive anchors before refinement (dotted) and
# after refinement (solid).
visualize.draw_boxes(image, boxes=positive_anchors, refined_boxes=refined_anchors, ax=get_ax())

## 1.b RPN Predictions
Here we run the RPN graph and display its predictions.

In [None]:
# Run RPN sub-graph
pillar = model.keras_model.get_layer("ROI").output  # node to start searching from

# TF 1.4 and 1.9 introduce new versions of NMS. Search for all names to support TF 1.3~1.10
nms_node = model.ancestor(pillar, "ROI/rpn_non_max_suppression:0")
if nms_node is None:
    nms_node = model.ancestor(pillar, "ROI/rpn_non_max_suppression/NonMaxSuppressionV2:0")
if nms_node is None: #TF 1.9-1.10
    nms_node = model.ancestor(pillar, "ROI/rpn_non_max_suppression/NonMaxSuppressionV3:0")

rpn = model.run_graph([image], [
    ("rpn_class", model.keras_model.get_layer("rpn_class").output),
    ("pre_nms_anchors", model.ancestor(pillar, "ROI/pre_nms_anchors:0")),
    ("refined_anchors", model.ancestor(pillar, "ROI/refined_anchors:0")),
    ("refined_anchors_clipped", model.ancestor(pillar, "ROI/refined_anchors_clipped:0")),
    ("post_nms_anchor_ix", nms_node),
    ("proposals", model.keras_model.get_layer("ROI").output),
])

In [None]:
# Show top anchors by score (before refinement)
limit = 200
sorted_anchor_ids = np.argsort(rpn['rpn_class'][:,:,1].flatten())[::-1]
visualize.draw_boxes(image, boxes=model.anchors[sorted_anchor_ids[:limit]], ax=get_ax())

In [None]:
# Show top anchors with refinement. Then with clipping to image boundaries
limit = 50
ax = get_ax(1, 2)
pre_nms_anchors = utils.denorm_boxes(rpn["pre_nms_anchors"][0], image.shape[:2])
refined_anchors = utils.denorm_boxes(rpn["refined_anchors"][0], image.shape[:2])
refined_anchors_clipped = utils.denorm_boxes(rpn["refined_anchors_clipped"][0], image.shape[:2])
visualize.draw_boxes(image, boxes=pre_nms_anchors[:limit],
                     refined_boxes=refined_anchors[:limit], ax=ax[0])
visualize.draw_boxes(image, refined_boxes=refined_anchors_clipped[:limit], ax=ax[1])

In [None]:
# Show refined anchors after non-max suppression
limit = 50
ixs = rpn["post_nms_anchor_ix"][:limit]
visualize.draw_boxes(image, refined_boxes=refined_anchors_clipped[ixs], ax=get_ax())

In [None]:
# Show final proposals
# These are the same as the previous step (refined anchors 
# after NMS) but with coordinates normalized to [0, 1] range.
limit = 1000
# Convert back to image coordinates for display
h, w = config.IMAGE_SHAPE[:2]
proposals = rpn['proposals'][0, :limit] * np.array([h, w, h, w])
visualize.draw_boxes(image, refined_boxes=proposals, ax=get_ax())

In [None]:
# Measure the RPN recall (percent of objects covered by anchors)
# Here we measure recall for 3 different methods:
# - All anchors
# - All refined anchors
# - Refined anchors after NMS
iou_threshold = 0.7

recall, positive_anchor_ids = utils.compute_recall(model.anchors, gt_bbox, iou_threshold)
print("All Anchors ({:5})       Recall: {:.3f}  Positive anchors: {}".format(
    model.anchors.shape[0], recall, len(positive_anchor_ids)))

recall, positive_anchor_ids = utils.compute_recall(rpn['refined_anchors'][0], gt_bbox, iou_threshold)
print("Refined Anchors ({:5})   Recall: {:.3f}  Positive anchors: {}".format(
    rpn['refined_anchors'].shape[1], recall, len(positive_anchor_ids)))

recall, positive_anchor_ids = utils.compute_recall(proposals, gt_bbox, iou_threshold)
print("Post NMS Anchors ({:5})  Recall: {:.3f}  Positive anchors: {}".format(
    proposals.shape[0], recall, len(positive_anchor_ids)))

## Stage 2: Proposal Classification

This stage takes the region proposals from the RPN and classifies them.

### 2.a Proposal Classification

Run the classifier heads on proposals to generate class propbabilities and bounding box regressions.

In [None]:
# Get input and output to classifier and mask heads.
mrcnn = model.run_graph([image], [
    ("proposals", model.keras_model.get_layer("ROI").output),
    ("probs", model.keras_model.get_layer("mrcnn_class").output),
    ("deltas", model.keras_model.get_layer("mrcnn_bbox").output),
    ("masks", model.keras_model.get_layer("mrcnn_mask").output),
    ("detections", model.keras_model.get_layer("mrcnn_detection").output),
])

In [None]:
# Get detection class IDs. Trim zero padding.
det_class_ids = mrcnn['detections'][0, :, 4].astype(np.int32)
det_count = np.where(det_class_ids == 0)[0][0]
# det_class_ids = det_class_ids[:det_count]
detections = mrcnn['detections'][0, :]
# detections = mrcnn['detections'][0, :det_count]
detections = mrcnn['detections'][0, :]

# print("{} detections: {}".format(
#     det_count, np.array(dataset.class_names)[det_class_ids]))
print("{} detections: {}".format(
    det_count, np.array(class_names)[det_class_ids]))


# captions = ["{} {:.3f}".format(dataset.class_names[int(c)], s) if c > 0 else ""
#             for c, s in zip(detections[:, 4], detections[:, 5])]
captions = ["{} {:.3f}".format(class_names[int(c)], s) if c > 0 else ""
            for c, s in zip(detections[:, 4], detections[:, 5])]

visualize.draw_boxes(
    image, 
    refined_boxes=utils.denorm_boxes(detections[:, :4], image.shape[:2]),
    visibilities=[2] * len(detections),
    captions=captions, title="Detections",
    ax=get_ax())

### 2.c Step by Step Detection

Here we dive deeper into the process of processing the detections.

In [None]:
roi = model.run_graph([image], [
    ("input_anchors_input", tf.identity(model.keras_model.get_layer("input_anchors").input)),
    ("input_anchors_output", tf.identity(model.keras_model.get_layer("input_anchors").output)),
    
    ("input_image_meta_input", tf.identity(model.keras_model.get_layer("input_image_meta").input)),
    ("input_image_meta_output", tf.identity(model.keras_model.get_layer("input_image_meta").output)),
    
    ("roi_align_classifier_output", tf.identity(model.keras_model.get_layer("roi_align_classifier").output)),
    
    ("mrcnn_class_conv1", tf.identity(model.keras_model.get_layer("mrcnn_class_conv1").output)),
])

In [None]:
# Proposals are in normalized coordinates. Scale them
# to image coordinates.
h, w = config.IMAGE_SHAPE[:2]
proposals = np.around(mrcnn["proposals"][0] * np.array([h, w, h, w])).astype(np.int32)

# Class ID, score, and mask per proposal
roi_class_ids = np.argmax(mrcnn["probs"][0], axis=1)
roi_scores = mrcnn["probs"][0, np.arange(roi_class_ids.shape[0]), roi_class_ids]
# roi_class_names = np.array(dataset.class_names)[roi_class_ids]
roi_class_names = np.array(class_names)[roi_class_ids]
roi_positive_ixs = np.where(roi_class_ids > 0)[0]

# How many ROIs vs empty rows?
print("{} Valid proposals out of {}".format(np.sum(np.any(proposals, axis=1)), proposals.shape[0]))
print("{} Positive ROIs".format(len(roi_positive_ixs)))

# Class counts
print(list(zip(*np.unique(roi_class_names, return_counts=True))))

In [None]:
# Display a random sample of proposals.
# Proposals classified as background are dotted, and
# the rest show their class and confidence score.
limit = 100
# ixs = np.random.randint(0, proposals.shape[0], limit)
ixs = np.where(roi_class_ids == 1)[0]
print(len(ixs))
# captions = ["{} {:.3f}".format(dataset.class_names[c], s) if c > 0 else ""
#             for c, s in zip(roi_class_ids[ixs], roi_scores[ixs])]
captions = ["{} {:.3f}".format(class_names[c], s) if c > 0 else ""
            for c, s in zip(roi_class_ids[ixs], roi_scores[ixs])]
visualize.draw_boxes(image, boxes=proposals[ixs],
                     visibilities=np.where(roi_class_ids[ixs] > 0, 2, 1),
                     captions=captions, title="ROIs Before Refinement",
                     ax=get_ax())

#### Apply Bounding Box Refinement

In [None]:
# Class-specific bounding box shifts.
roi_bbox_specific = mrcnn["deltas"][0, np.arange(proposals.shape[0]), roi_class_ids]
log("roi_bbox_specific", roi_bbox_specific)

# Apply bounding box transformations
# Shape: [N, (y1, x1, y2, x2)]
refined_proposals = utils.apply_box_deltas(
    proposals, roi_bbox_specific * config.BBOX_STD_DEV).astype(np.int32)
log("refined_proposals", refined_proposals)

# Show positive proposals
# ids = np.arange(roi_boxes.shape[0])  # Display all
limit = 100
ids = np.random.randint(0, len(roi_positive_ixs), limit)  # Display random sample
# captions = ["{} {:.3f}".format(dataset.class_names[c], s) if c > 0 else ""
#             for c, s in zip(roi_class_ids[roi_positive_ixs][ids], roi_scores[roi_positive_ixs][ids])]
captions = ["{} {:.3f}".format(class_names[c], s) if c > 0 else ""
            for c, s in zip(roi_class_ids[roi_positive_ixs][ids], roi_scores[roi_positive_ixs][ids])]
visualize.draw_boxes(image, boxes=proposals[roi_positive_ixs][ids],
                     refined_boxes=refined_proposals[roi_positive_ixs][ids],
                     visibilities=np.where(roi_class_ids[roi_positive_ixs][ids] > 0, 1, 0),
                     captions=captions, title="ROIs After Refinement",
                     ax=get_ax())

#### Filter Low Confidence Detections

In [None]:
# Remove boxes classified as background
keep = np.where(roi_class_ids > 0)[0]
print("Keep {} detections:\n{}".format(keep.shape[0], keep))

In [None]:
# Remove low confidence detections
keep = np.intersect1d(keep, np.where(roi_scores >= config.DETECTION_MIN_CONFIDENCE)[0])
print("Remove boxes below {} confidence. Keep {}:\n{}".format(
    config.DETECTION_MIN_CONFIDENCE, keep.shape[0], keep))

#### Per-Class Non-Max Suppression

In [None]:
# Apply per-class non-max suppression
pre_nms_boxes = refined_proposals[keep]
pre_nms_scores = roi_scores[keep]
pre_nms_class_ids = roi_class_ids[keep]

nms_keep = []
for class_id in np.unique(pre_nms_class_ids):
    # Pick detections of this class
    ixs = np.where(pre_nms_class_ids == class_id)[0]
    # Apply NMS
    class_keep = utils.non_max_suppression(pre_nms_boxes[ixs], 
                                            pre_nms_scores[ixs],
                                            config.DETECTION_NMS_THRESHOLD)
    # Map indicies
    class_keep = keep[ixs[class_keep]]
    nms_keep = np.union1d(nms_keep, class_keep)
#     print("{:22}: {} -> {}".format(dataset.class_names[class_id][:20], 
#                                    keep[ixs], class_keep))
    print("{:22}: {} -> {}".format(class_names[class_id][:20], 
                                   keep[ixs], class_keep))

keep = np.intersect1d(keep, nms_keep).astype(np.int32)
print("\nKept after per-class NMS: {}\n{}".format(keep.shape[0], keep))

In [None]:
# Show final detections
ixs = np.arange(len(keep))  # Display all
# ixs = np.random.randint(0, len(keep), 10)  # Display random sample
# captions = ["{} {:.3f}".format(dataset.class_names[c], s) if c > 0 else ""
#             for c, s in zip(roi_class_ids[keep][ixs], roi_scores[keep][ixs])]
captions = ["{} {:.3f}".format(class_names[c], s) if c > 0 else ""
            for c, s in zip(roi_class_ids[keep][ixs], roi_scores[keep][ixs])]
visualize.draw_boxes(
    image, boxes=proposals[keep][ixs],
    refined_boxes=refined_proposals[keep][ixs],
    visibilities=np.where(roi_class_ids[keep][ixs] > 0, 1, 0),
    captions=captions, title="Detections after NMS",
    ax=get_ax())

## Stage 3: Generating Masks

This stage takes the detections (refined bounding boxes and class IDs) from the previous layer and runs the mask head to generate segmentation masks for every instance.

### 3.a Mask Targets

These are the training targets for the mask branch

In [None]:
display_images(np.transpose(gt_mask, [2, 0, 1]), cmap="Blues")

### 3.b Predicted Masks

In [None]:
# Get predictions of mask head
mrcnn = model.run_graph([image], [
    ("detections", model.keras_model.get_layer("mrcnn_detection").output),
    ("masks", model.keras_model.get_layer("mrcnn_mask").output),
])

# Get detection class IDs. Trim zero padding.
det_class_ids = mrcnn['detections'][0, :, 4].astype(np.int32)
det_count = np.where(det_class_ids == 0)[0][0]
det_class_ids = det_class_ids[:det_count]

print("{} detections: {}".format(
    det_count, np.array(dataset.class_names)[det_class_ids]))

In [None]:
# Masks
det_boxes = utils.denorm_boxes(mrcnn["detections"][0, :, :4], image.shape[:2])
det_mask_specific = np.array([mrcnn["masks"][0, i, :, :, c] 
                              for i, c in enumerate(det_class_ids)])
det_masks = np.array([utils.unmold_mask(m, det_boxes[i], image.shape)
                      for i, m in enumerate(det_mask_specific)])
log("det_mask_specific", det_mask_specific)
log("det_masks", det_masks)

In [None]:
display_images(det_mask_specific[:4] * 255, cmap="Blues", interpolation="none")

In [None]:
display_images(det_masks[:4] * 255, cmap="Blues", interpolation="none")

## Visualize Activations

In some cases it helps to look at the output from different layers and visualize them to catch issues and odd patterns.

In [None]:
# Get activations of a few sample layers
activations = model.run_graph([image], [
    ("input_image",        tf.identity(model.keras_model.get_layer("input_image").output)),
    ("res4w_out",          model.keras_model.get_layer("res4w_out").output),  # for resnet100
    ("rpn_bbox",           model.keras_model.get_layer("rpn_bbox").output),
    ("roi",                model.keras_model.get_layer("ROI").output),
])

In [None]:
# Input image (normalized)
_ = plt.imshow(modellib.unmold_image(activations["input_image"][0],config))

In [None]:
# Backbone feature map
# 64 * 64 * 1024
print(activations["res4w_out"].shape)
display_images(np.transpose(activations["res4w_out"][0,:,:,:100], [2, 0, 1]))

In [None]:
# Histograms of RPN bounding box deltas
plt.figure(figsize=(12, 3))
plt.subplot(1, 4, 1)
plt.title("dy")
_ = plt.hist(activations["rpn_bbox"][0,:,0], 50)
plt.subplot(1, 4, 2)
plt.title("dx")
_ = plt.hist(activations["rpn_bbox"][0,:,1], 50)
plt.subplot(1, 4, 3)
plt.title("dw")
_ = plt.hist(activations["rpn_bbox"][0,:,2], 50)
plt.subplot(1, 4, 4)
plt.title("dh")
_ = plt.hist(activations["rpn_bbox"][0,:,3], 50)

In [None]:
# Distribution of y, x coordinates of generated proposals
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.title("y1, x1")
plt.scatter(activations["roi"][0,:,0], activations["roi"][0,:,1])
plt.subplot(1, 2, 2)
plt.title("y2, x2")
plt.scatter(activations["roi"][0,:,2], activations["roi"][0,:,3])
plt.show()