In [1]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

# import tensorflow as tf
# tf.compat.v1.enable_eager_execution()

  if os.name is 'nt':


In [2]:
class ShapesConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "shapes"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 8

    # Number of classes (including background)
    NUM_CLASSES = 1 + 3  # background + 3 shapes

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 128
    IMAGE_MAX_DIM = 128

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 32

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 100

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5
    
config = ShapesConfig()
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     8
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 8
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  128
IMAGE_META_SIZE                16
IMAGE_MIN_DIM                  128
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [128 128   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE             

In [3]:
def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

In [4]:
class ShapesDataset(utils.Dataset):
    """Generates the shapes synthetic dataset. The dataset consists of simple
    shapes (triangles, squares, circles) placed randomly on a blank surface.
    The images are generated on the fly. No file access required.
    """

    def load_shapes(self, count, height, width):
        """Generate the requested number of synthetic images.
        count: number of images to generate.
        height, width: the size of the generated images.
        """
        # Add classes
        self.add_class("shapes", 1, "square")
        self.add_class("shapes", 2, "circle")
        self.add_class("shapes", 3, "triangle")

        # Add images
        # Generate random specifications of images (i.e. color and
        # list of shapes sizes and locations). This is more compact than
        # actual images. Images are generated on the fly in load_image().
        for i in range(count):
            bg_color, shapes = self.random_image(height, width)
            self.add_image("shapes", image_id=i, path=None,
                           width=width, height=height,
                           bg_color=bg_color, shapes=shapes)

    def load_image(self, image_id):
        """Generate an image from the specs of the given image ID.
        Typically this function loads the image from a file, but
        in this case it generates the image on the fly from the
        specs in image_info.
        """
        info = self.image_info[image_id]
        bg_color = np.array(info['bg_color']).reshape([1, 1, 3])
        image = np.ones([info['height'], info['width'], 3], dtype=np.uint8)
        image = image * bg_color.astype(np.uint8)
        for shape, color, dims in info['shapes']:
            image = self.draw_shape(image, shape, dims, color)
        return image

    def image_reference(self, image_id):
        """Return the shapes data of the image."""
        info = self.image_info[image_id]
        if info["source"] == "shapes":
            return info["shapes"]
        else:
            super(self.__class__).image_reference(self, image_id)

    def load_mask(self, image_id):
        """Generate instance masks for shapes of the given image ID.
        """
        info = self.image_info[image_id]
        shapes = info['shapes']
        count = len(shapes)
        mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
        for i, (shape, _, dims) in enumerate(info['shapes']):
            mask[:, :, i:i+1] = self.draw_shape(mask[:, :, i:i+1].copy(),
                                                shape, dims, 1)
        # Handle occlusions
        occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
        for i in range(count-2, -1, -1):
            mask[:, :, i] = mask[:, :, i] * occlusion
            occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
        # Map class names to class IDs.
        class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
        return mask.astype(np.bool), class_ids.astype(np.int32)

    def draw_shape(self, image, shape, dims, color):
        """Draws a shape from the given specs."""
        # Get the center x, y and the size s
        x, y, s = dims
        if shape == 'square':
            cv2.rectangle(image, (x-s, y-s), (x+s, y+s), color, -1)
        elif shape == "circle":
            cv2.circle(image, (x, y), s, color, -1)
        elif shape == "triangle":
            points = np.array([[(x, y-s),
                                (x-s/math.sin(math.radians(60)), y+s),
                                (x+s/math.sin(math.radians(60)), y+s),
                                ]], dtype=np.int32)
            cv2.fillPoly(image, points, color)
        return image

    def random_shape(self, height, width):
        """Generates specifications of a random shape that lies within
        the given height and width boundaries.
        Returns a tuple of three valus:
        * The shape name (square, circle, ...)
        * Shape color: a tuple of 3 values, RGB.
        * Shape dimensions: A tuple of values that define the shape size
                            and location. Differs per shape type.
        """
        # Shape
        shape = random.choice(["square", "circle", "triangle"])
        # Color
        color = tuple([random.randint(0, 255) for _ in range(3)])
        # Center x, y
        buffer = 20
        y = random.randint(buffer, height - buffer - 1)
        x = random.randint(buffer, width - buffer - 1)
        # Size
        s = random.randint(buffer, height//4)
        return shape, color, (x, y, s)

    def random_image(self, height, width):
        """Creates random specifications of an image with multiple shapes.
        Returns the background color of the image and a list of shape
        specifications that can be used to draw the image.
        """
        # Pick random background color
        bg_color = np.array([random.randint(0, 255) for _ in range(3)])
        # Generate a few random shapes and record their
        # bounding boxes
        shapes = []
        boxes = []
        N = random.randint(1, 4)
        for _ in range(N):
            shape, color, dims = self.random_shape(height, width)
            shapes.append((shape, color, dims))
            x, y, s = dims
            boxes.append([y-s, x-s, y+s, x+s])
        # Apply non-max suppression wit 0.3 threshold to avoid
        # shapes covering each other
        keep_ixs = utils.non_max_suppression(np.array(boxes), np.arange(N), 0.3)
        shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
        return bg_color, shapes

In [5]:
# Training dataset
dataset_train = ShapesDataset()
dataset_train.load_shapes(500, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
dataset_train.prepare()

# Validation dataset
dataset_val = ShapesDataset()
dataset_val.load_shapes(50, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
dataset_val.prepare()

In [6]:
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)

----------------------------debug traceback_utils.py 65
The caller function is 'build' in module 'mrcnn.model'
----------------------------debug traceback_utils.py 65
The caller function is 'Input' in module 'keras.engine.input_layer'
----------------------------debug traceback_utils.py 65
The caller function is 'build' in module 'mrcnn.model'
----------------------------debug traceback_utils.py 65
The caller function is 'Input' in module 'keras.engine.input_layer'
----------------------------debug traceback_utils.py 65
The caller function is 'build' in module 'mrcnn.model'
----------------------------debug traceback_utils.py 65
The caller function is 'Input' in module 'keras.engine.input_layer'
----------------------------debug traceback_utils.py 65
The caller function is 'build' in module 'mrcnn.model'
----------------------------debug traceback_utils.py 65
The caller function is 'Input' in module 'keras.engine.input_layer'
----------------------------debug traceback_utils.py 65
The 

----------------------------debug traceback_utils.py 101
The caller function is '_infer_output_signature' in module 'keras.engine.base_layer' (current in base_layer.py 989)
----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'conv_block' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): bn2a_branch2c
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(None, None, None, 256) dtype=float32 (created by layer 'res2a_branch2c')>,)
kwargs: {'training': False}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'ker

----------------------------debug traceback_utils.py 101
The caller function is '_infer_output_signature' in module 'keras.engine.base_layer' (current in base_layer.py 989)
----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'conv_block' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): bn3a_branch1
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(None, None, None, 512) dtype=float32 (created by layer 'res3a_branch1')>,)
kwargs: {'training': False}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras

----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'conv_block' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): bn4a_branch2b
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(None, None, None, 256) dtype=float32 (created by layer 'res4a_branch2b')>,)
kwargs: {'training': False}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras.engine.base_layer' (current in base_layer.py 2358)
The caller function is '<listcomp>' in module 'tensorflow.python.util.nest' (currently in keras_tensor.py 670)
--------

----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'identity_block' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): activation_21
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(None, None, None, 256) dtype=float32 (created by layer 'bn4d_branch2a')>,)
kwargs: {}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras.engine.base_layer' (current in base_layer.py 2358)
The caller function is '<listcomp>' in module 'tensorflow.python.util.nest' (currently in keras_tensor.py 670)
----------------------

----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'identity_block' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): activation_28
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(None, None, None, 256) dtype=float32 (created by layer 'bn4g_branch2b')>,)
kwargs: {}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras.engine.base_layer' (current in base_layer.py 2358)
The caller function is '<listcomp>' in module 'tensorflow.python.util.nest' (currently in keras_tensor.py 670)
----------------------

The caller function is 'identity_block' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): add_16
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: ([<KerasTensor: shape=(None, None, None, 1024) dtype=float32 (created by layer 'bn4j_branch2c')>, <KerasTensor: shape=(None, None, None, 1024) dtype=float32 (created by layer 'res4i_out')>],)
kwargs: {}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras.engine.base_layer' (current in base_layer.py 2358)
The caller function is '<listcomp>' in module 'tensorflow.python.util.nest' (currently in keras_tensor.py 670)
----------------------------debug keras_tensor.py 243
The caller function is 'keras_tensor_to_pl

The caller function is 'identity_block' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): bn4n_branch2a
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(None, None, None, 256) dtype=float32 (created by layer 'res4n_branch2a')>,)
kwargs: {'training': False}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras.engine.base_layer' (current in base_layer.py 2358)
The caller function is '<listcomp>' in module 'tensorflow.python.util.nest' (currently in keras_tensor.py 670)
----------------------------debug keras_tensor.py 243
The caller function is 'keras_tensor_to_placeholder' in module 'keras.engine.keras_tensor' (currently in keras_t

The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(None, None, None, 256) dtype=float32 (created by layer 'res4q_branch2b')>,)
kwargs: {'training': False}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras.engine.base_layer' (current in base_layer.py 2358)
The caller function is '<listcomp>' in module 'tensorflow.python.util.nest' (currently in keras_tensor.py 670)
----------------------------debug keras_tensor.py 243
The caller function is 'keras_tensor_to_placeholder' in module 'keras.engine.keras_tensor' (currently in keras_tensor.py 248)
keras_tensor.py self: KerasTensor(type_spec=TensorSpec(shape=(None, None, None, 256), dtype=tf.float32, name=None), name='res4q_bran

----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'identity_block' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): add_26
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: ([<KerasTensor: shape=(None, None, None, 1024) dtype=float32 (created by layer 'bn4t_branch2c')>, <KerasTensor: shape=(None, None, None, 1024) dtype=float32 (created by layer 'res4s_out')>],)
kwargs: {}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras.engine.base_layer' (current in base_layer.py 2358)
The caller function is '<listcomp>' in module

----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'identity_block' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): bn4v_branch2c
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(None, None, None, 1024) dtype=float32 (created by layer 'res4v_branch2c')>,)
kwargs: {'training': False}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras.engine.base_layer' (current in base_layer.py 2358)
The caller function is '<listcomp>' in module 'tensorflow.python.util.nest' (currently in keras_tensor.py 670)
---

----------------------------debug traceback_utils.py 101
The caller function is '_infer_output_signature' in module 'keras.engine.base_layer' (current in base_layer.py 989)
----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'identity_block' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): bn5b_branch2b
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(None, None, None, 512) dtype=float32 (created by layer 'res5b_branch2b')>,)
kwargs: {'training': False}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 

----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'build' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): fpn_p6
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(None, None, None, 256) dtype=float32 (created by layer 'fpn_p5')>,)
kwargs: {}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras.engine.base_layer' (current in base_layer.py 2358)
The caller function is '<listcomp>' in module 'tensorflow.python.util.nest' (currently in keras_tensor.py 670)
----------------------------debug keras_tenso

----------------------------debug api.py 321
The caller function is 'tf__call' in module '__autograph_generated_fileswlth1g8'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__call' in module '__autograph_generated_fileswlth1g8'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__call' in module '__autograph_generated_fileswlth1g8'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__call' in module '__autograph_generated_fileswlth1g8'
----------------------------debug api.py 451
The caller function is 'converted_call' in module

----------------------------debug api.py 321
The caller function is 'tf__apply_box_deltas_graph' in module '__autograph_generated_fileu7qgygo9'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__apply_box_deltas_graph' in module '__autograph_generated_fileu7qgygo9'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__apply_box_deltas_graph' in module '__autograph_generated_fileu7qgygo9'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'loop_body' in module '__autograph_generated_fileivrt_ppa'
----------------------------debug api.p

----------------------------debug api.py 321
The caller function is 'tf__apply_box_deltas_graph' in module '__autograph_generated_fileu7qgygo9'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__apply_box_deltas_graph' in module '__autograph_generated_fileu7qgygo9'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'loop_body' in module '__autograph_generated_fileivrt_ppa'
----------------------------debug api.py 321
The caller function is 'loop_body' in module '__autograph_generated_fileivrt_ppa'
----------------------------debug api.py 321
The caller function is 'tf__batch_slice' in module '__autograph_generated_fileivrt_ppa'
----------------------------debug api.py 321
The caller f

----------------------------debug api.py 321
The caller function is 'nms' in module '__autograph_generated_fileswlth1g8'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'nms' in module '__autograph_generated_fileswlth1g8'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'nms' in module '__autograph_generated_fileswlth1g8'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'nms' in module '__autograph_generated_fileswlth1g8'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.

----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug func_graph.py 1187
The caller function is 'cond_v2' in module 'tensorflow.python.ops.cond_v2'
----------------------------debug f

----------------------------debug api.py 321
The caller function is 'tf__box_refinement_graph' in module '__autograph_generated_filep9st6577'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__box_refinement_graph' in module '__autograph_generated_filep9st6577'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__box_refinement_graph' in module '__autograph_generated_filep9st6577'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
---------------------------

----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
------------------

The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__overlaps_graph' in module '__autograph_generated_file7zsp5_sx'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 321
The caller function is 'tf__overlaps_graph' in module '__autograph_generated_file7zsp5_sx'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__overlaps_graph' in module '__autograph_generated_file7zsp5_sx'
----------------------------debug api.py 451
The caller function is 'converted_call' in mo

----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
------------------

----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
------------------

----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
------------------

----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug func_graph.py 1187
The caller function is 'cond_v2' in module 'tensorflow.python.ops.cond_v2'
----------------------------debug func_graph.py 1187
The caller function is 'cond_v2' in module 'tensorflow.python.ops.cond_v2'
The caller function is 'placeholder' in module 'tensorflow.python.ops.array_ops' (current in gen_array_ops.py 6902)
name: None
dtype: 1
----------------------------debug api.py 321
The c

----------------------------debug api.py 321
The caller function is 'tf__box_refinement_graph' in module '__autograph_generated_filep9st6577'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__box_refinement_graph' in module '__autograph_generated_filep9st6577'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__box_refinement_graph' in module '__autograph_generated_filep9st6577'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
---------------------------

----------------------------debug api.py 321
The caller function is 'tf__box_refinement_graph' in module '__autograph_generated_filep9st6577'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__box_refinement_graph' in module '__autograph_generated_filep9st6577'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__box_refinement_graph' in module '__autograph_generated_filep9st6577'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__detection_targets_graph' in module '__autograph_generated_filea_c3mqly'
---------------------------

----------------------------debug api.py 321
The caller function is 'tf__call' in module '__autograph_generated_filedh12r6iv'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__call' in module '__autograph_generated_filedh12r6iv'
----------------------------debug api.py 321
The caller function is 'tf__call' in module '__autograph_generated_filedh12r6iv'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__call' in module '__autograph_generated_filedh12r6iv'
----------------------------debug api.py 451
The caller function is 'converted_call' in module 'tensorflow.python.autograph.impl.api'
----------------------------debug api.py 321
The caller function is 'tf__call' in module '__au

----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'fpn_classifier_graph' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): mrcnn_class_bn1
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(8, None, 1, 1, 1024) dtype=float32 (created by layer 'mrcnn_class_conv1')>,)
kwargs: {'training': False}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras.engine.base_layer' (current in base_layer.py 2358)
The caller function is '<listcomp>' in module 'tensorflow.python.util.nest' (currently in keras_tensor.py 

outputs: Tensor("mrcnn_mask_conv1/conv2d_2/BiasAdd:0", shape=(None, 14, 14, 256), dtype=float32)
----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'build_fpn_mask_graph' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): mrcnn_mask_bn1
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(8, None, 14, 14, 256) dtype=float32 (created by layer 'mrcnn_mask_conv1')>,)
kwargs: {'training': False}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras.engine.base_layer' (current in base_layer.py 2358)
The caller

outputs: Tensor("mrcnn_mask_bn4/batch_norm_5/FusedBatchNormV3:0", shape=(None, 14, 14, 256), dtype=float32)
----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'build_fpn_mask_graph' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): activation_73
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: (<KerasTensor: shape=(8, None, 14, 14, 256) dtype=float32 (created by layer 'mrcnn_mask_bn4')>,)
kwargs: {}
----------------------------debug base_layer.py 1009
----------------------------debug base_layer.py 1011
----------------------------debug base_layer.py 1013
----------------------------debug base_layer.py 1023
The caller function is '_functional_construction_call' in module 'keras.engine.base_layer' (current in base_layer.py 2358)
The caller function

----------------------------debug func_graph.py 1187
The caller function is 'cond_v2' in module 'tensorflow.python.ops.cond_v2'
----------------------------debug func_graph.py 1187
The caller function is 'cond_v2' in module 'tensorflow.python.ops.cond_v2'
The caller function is 'placeholder' in module 'tensorflow.python.ops.array_ops' (current in gen_array_ops.py 6902)
name: None
dtype: 1
The caller function is 'placeholder' in module 'tensorflow.python.ops.array_ops' (current in gen_array_ops.py 6902)
name: None
dtype: 1
----------------------------debug base_layer.py 2462
----------------------------debug base_layer.py 2471
----------------------------debug traceback_utils.py 65
The caller function is 'build' in module 'mrcnn.model'
----------------------------debug base_layer.py 983
str(self.name): mrcnn_class_loss
The caller function is 'error_handler' in module 'keras.utils.traceback_utils' (current in base_layer.py 989)
args: ([<KerasTensor: shape=(8, None) dtype=int32 (created b

In [9]:
# Which weights to start with?
init_with = "coco"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    # See README for instructions to download the COCO weights
#     model.load_weights(COCO_MODEL_PATH, by_name=True,
#                        exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
#                                 "mrcnn_bbox", "mrcnn_mask"])
    model.load_weights(COCO_MODEL_PATH, by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox"])
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last(), by_name=True)


ValueError: Layer #395 (named "mrcnn_mask"), weight <tf.Variable 'mrcnn_mask/kernel:0' shape=(1, 1, 256, 4) dtype=float32, numpy=
array([[[[ 0.01472144, -0.12659085, -0.05539732, -0.07775003],
         [-0.07621042, -0.12601024,  0.09985492,  0.00474116],
         [ 0.09449607,  0.00414918,  0.10741952, -0.06358542],
         ...,
         [ 0.10174569,  0.04687637,  0.0188061 ,  0.0599374 ],
         [ 0.08347189, -0.13839446,  0.14561173, -0.05601288],
         [ 0.08270496, -0.13145635,  0.0428959 , -0.08929215]]]],
      dtype=float32)> has shape (1, 1, 256, 4), but the saved weight has shape (81, 256, 1, 1).

In [16]:
# has shape (1, 1, 256, 4), but the saved weight has shape (81, 256, 1, 1)
# has shape (1024, 4), but the saved weight has shape (1024, 81)
# has shape (1, 1, 256, 4), but the saved weight has shape (81, 256, 1, 1)
COCO_MODEL_PATH
# MODEL_DIR

'C:\\Users\\zhhua\\OneDrive\\Desktop\\PythonProgram\\CAT\\mask_rcnn_coco.h5'

In [14]:
import h5py
with h5py.File('C:\\Users\\zhhua\\OneDrive\\Desktop\\PythonProgram\\CAT\\mask_rcnn_coco.h5', "r") as f:
    # Print all root level object names (aka keys) 
    # these can be group or dataset names 
    print("Keys: %s" % f.keys())
    # get first object name/key; may or may NOT be a group
    a_group_key = list(f.keys())[0]

    # get the object type for a_group_key: usually group or dataset
    print(type(f[a_group_key])) 

    # If a_group_key is a group name, 
    # this gets the object names in the group and returns as a list
    data = list(f[a_group_key])

    # If a_group_key is a dataset name, 
    # this gets the dataset values and returns as a list
    data = list(f[a_group_key])
    # preferred methods to get dataset values:
    ds_obj = f[a_group_key]      # returns as a h5py dataset object
#     ds_arr = f[a_group_key][()]  # returns as a numpy array
    print('ds_obj: %s' % ds_obj)

Keys: <KeysViewHDF5 ['ROI', 'activation_1', 'activation_10', 'activation_11', 'activation_12', 'activation_13', 'activation_14', 'activation_15', 'activation_16', 'activation_17', 'activation_18', 'activation_19', 'activation_2', 'activation_20', 'activation_21', 'activation_22', 'activation_23', 'activation_24', 'activation_25', 'activation_26', 'activation_27', 'activation_28', 'activation_29', 'activation_3', 'activation_30', 'activation_31', 'activation_32', 'activation_33', 'activation_34', 'activation_35', 'activation_36', 'activation_37', 'activation_38', 'activation_39', 'activation_4', 'activation_40', 'activation_41', 'activation_42', 'activation_43', 'activation_44', 'activation_45', 'activation_46', 'activation_47', 'activation_48', 'activation_49', 'activation_5', 'activation_50', 'activation_51', 'activation_52', 'activation_53', 'activation_54', 'activation_55', 'activation_56', 'activation_57', 'activation_58', 'activation_59', 'activation_6', 'activation_60', 'activati

In [None]:
import warnings
warnings.filterwarnings('ignore')
# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layerh ks to train by name pattern.
# model.train(dataset_train, dataset_val, 
#             learning_rate=config.LEARNING_RATE, 
#             epochs=1, 
#             layers='heads')
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs=1, 
            layers='3+')

In [6]:
def data_generator(dataset, config, shuffle=True, augment=False, augmentation=None,
                   random_rois=0, batch_size=1, detection_targets=False,
                   no_augmentation_sources=None):
    """A generator that returns images and corresponding target class ids,
    bounding box deltas, and masks.

    dataset: The Dataset object to pick data from
    config: The model config object
    shuffle: If True, shuffles the samples before every epoch
    augment: (deprecated. Use augmentation instead). If true, apply random
        image augmentation. Currently, only horizontal flipping is offered.
    augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
        For example, passing imgaug.augmenters.Fliplr(0.5) flips images
        right/left 50% of the time.
    random_rois: If > 0 then generate proposals to be used to train the
                 network classifier and mask heads. Useful if training
                 the Mask RCNN part without the RPN.
    batch_size: How many images to return in each call
    detection_targets: If True, generate detection targets (class IDs, bbox
        deltas, and masks). Typically for debugging or visualizations because
        in trainig detection targets are generated by DetectionTargetLayer.
    no_augmentation_sources: Optional. List of sources to exclude for
        augmentation. A source is string that identifies a dataset and is
        defined in the Dataset class.

    Returns a Python generator. Upon calling next() on it, the
    generator returns two lists, inputs and outputs. The contents
    of the lists differs depending on the received arguments:
    inputs list:
    - images: [batch, H, W, C]
    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral)
    - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
    - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
    - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
    - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width
                are those of the image unless use_mini_mask is True, in which
                case they are defined in MINI_MASK_SHAPE.

    outputs list: Usually empty in regular training. But if detection_targets
        is True then the outputs list contains target class_ids, bbox deltas,
        and masks.
    """
    b = 0  # batch item index
    image_index = -1
    image_ids = np.copy(dataset.image_ids)
    error_count = 0
    no_augmentation_sources = no_augmentation_sources or []

    # Anchors
    # [anchor_count, (y1, x1, y2, x2)]
    backbone_shapes = compute_backbone_shapes(config, config.IMAGE_SHAPE)
    anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                             config.RPN_ANCHOR_RATIOS,
                                             backbone_shapes,
                                             config.BACKBONE_STRIDES,
                                             config.RPN_ANCHOR_STRIDE)

    # Keras requires a generator to run indefinitely.
    while True:
        try:
            # Increment index to pick next image. Shuffle if at the start of an epoch.
            image_index = (image_index + 1) % len(image_ids)
            if shuffle and image_index == 0:
                np.random.shuffle(image_ids)

            # Get GT bounding boxes and masks for image.
            image_id = image_ids[image_index]

            # If the image source is not to be augmented pass None as augmentation
            if dataset.image_info[image_id]['source'] in no_augmentation_sources:
                image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
                load_image_gt(dataset, config, image_id, augment=augment,
                              augmentation=None,
                              use_mini_mask=config.USE_MINI_MASK)
            else:
                image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
                    load_image_gt(dataset, config, image_id, augment=augment,
                                augmentation=augmentation,
                                use_mini_mask=config.USE_MINI_MASK)

            # Skip images that have no instances. This can happen in cases
            # where we train on a subset of classes and the image doesn't
            # have any of the classes we care about.
            if not np.any(gt_class_ids > 0):
                continue

            # RPN Targets
            rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors,
                                                    gt_class_ids, gt_boxes, config)

            # Mask R-CNN Targets
            if random_rois:
                rpn_rois = generate_random_rois(
                    image.shape, random_rois, gt_class_ids, gt_boxes)
                if detection_targets:
                    rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask =\
                        build_detection_targets(
                            rpn_rois, gt_class_ids, gt_boxes, gt_masks, config)

            # Init batch arrays
            if b == 0:
                batch_image_meta = np.zeros(
                    (batch_size,) + image_meta.shape, dtype=image_meta.dtype)
                batch_rpn_match = np.zeros(
                    [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype)
                batch_rpn_bbox = np.zeros(
                    [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype)
                batch_images = np.zeros(
                    (batch_size,) + image.shape, dtype=np.float32)
                batch_gt_class_ids = np.zeros(
                    (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32)
                batch_gt_boxes = np.zeros(
                    (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32)
                batch_gt_masks = np.zeros(
                    (batch_size, gt_masks.shape[0], gt_masks.shape[1],
                     config.MAX_GT_INSTANCES), dtype=gt_masks.dtype)
                if random_rois:
                    batch_rpn_rois = np.zeros(
                        (batch_size, rpn_rois.shape[0], 4), dtype=rpn_rois.dtype)
                    if detection_targets:
                        batch_rois = np.zeros(
                            (batch_size,) + rois.shape, dtype=rois.dtype)
                        batch_mrcnn_class_ids = np.zeros(
                            (batch_size,) + mrcnn_class_ids.shape, dtype=mrcnn_class_ids.dtype)
                        batch_mrcnn_bbox = np.zeros(
                            (batch_size,) + mrcnn_bbox.shape, dtype=mrcnn_bbox.dtype)
                        batch_mrcnn_mask = np.zeros(
                            (batch_size,) + mrcnn_mask.shape, dtype=mrcnn_mask.dtype)

            # If more instances than fits in the array, sub-sample from them.
            if gt_boxes.shape[0] > config.MAX_GT_INSTANCES:
                ids = np.random.choice(
                    np.arange(gt_boxes.shape[0]), config.MAX_GT_INSTANCES, replace=False)
                gt_class_ids = gt_class_ids[ids]
                gt_boxes = gt_boxes[ids]
                gt_masks = gt_masks[:, :, ids]

            # Add to batch
            batch_image_meta[b] = image_meta
            batch_rpn_match[b] = rpn_match[:, np.newaxis]
            batch_rpn_bbox[b] = rpn_bbox
            batch_images[b] = mold_image(image.astype(np.float32), config)
            batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids
            batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes
            batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks
            if random_rois:
                batch_rpn_rois[b] = rpn_rois
                if detection_targets:
                    batch_rois[b] = rois
                    batch_mrcnn_class_ids[b] = mrcnn_class_ids
                    batch_mrcnn_bbox[b] = mrcnn_bbox
                    batch_mrcnn_mask[b] = mrcnn_mask
            b += 1

            # Batch full?
            if b >= batch_size:
                inputs = [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox,
                          batch_gt_class_ids, batch_gt_boxes, batch_gt_masks]
                outputs = []

                if random_rois:
                    inputs.extend([batch_rpn_rois])
                    if detection_targets:
                        inputs.extend([batch_rois])
                        # Keras requires that output and targets have the same number of dimensions
                        batch_mrcnn_class_ids = np.expand_dims(
                            batch_mrcnn_class_ids, -1)
                        outputs.extend(
                            [batch_mrcnn_class_ids, batch_mrcnn_bbox, batch_mrcnn_mask])

                yield inputs, outputs

                # start a new batch
                b = 0
        except (GeneratorExit, KeyboardInterrupt):
            raise
        except:
            # Log it and skip the image
            logging.exception("Error processing image {}".format(
                dataset.image_info[image_id]))
            error_count += 1
            if error_count > 5:
                raise

In [7]:
def compute_backbone_shapes(config, image_shape):
    """Computes the width and height of each stage of the backbone network.

    Returns:
        [N, (height, width)]. Where N is the number of stages
    """
    if callable(config.BACKBONE):
        return config.COMPUTE_BACKBONE_SHAPE(image_shape)

    # Currently supports ResNet only
    assert config.BACKBONE in ["resnet50", "resnet101"]
    return np.array(
        [[int(math.ceil(image_shape[0] / stride)),
            int(math.ceil(image_shape[1] / stride))]
            for stride in config.BACKBONE_STRIDES])

In [8]:
def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
                  use_mini_mask=False):
    """Load and return ground truth data for an image (image, mask, bounding boxes).

    augment: (deprecated. Use augmentation instead). If true, apply random
        image augmentation. Currently, only horizontal flipping is offered.
    augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
        For example, passing imgaug.augmenters.Fliplr(0.5) flips images
        right/left 50% of the time.
    use_mini_mask: If False, returns full-size masks that are the same height
        and width as the original image. These can be big, for example
        1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
        224x224 and are generated by extracting the bounding box of the
        object and resizing it to MINI_MASK_SHAPE.

    Returns:
    image: [height, width, 3]
    shape: the original shape of the image before resizing and cropping.
    class_ids: [instance_count] Integer class IDs
    bbox: [instance_count, (y1, x1, y2, x2)]
    mask: [height, width, instance_count]. The height and width are those
        of the image unless use_mini_mask is True, in which case they are
        defined in MINI_MASK_SHAPE.
    """
    # Load image and mask
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    original_shape = image.shape
    image, window, scale, padding, crop = utils.resize_image(
        image,
        min_dim=config.IMAGE_MIN_DIM,
        min_scale=config.IMAGE_MIN_SCALE,
        max_dim=config.IMAGE_MAX_DIM,
        mode=config.IMAGE_RESIZE_MODE)
    mask = utils.resize_mask(mask, scale, padding, crop)

    # Random horizontal flips.
    # TODO: will be removed in a future update in favor of augmentation
    if augment:
        logging.warning("'augment' is deprecated. Use 'augmentation' instead.")
        if random.randint(0, 1):
            image = np.fliplr(image)
            mask = np.fliplr(mask)

    # Augmentation
    # This requires the imgaug lib (https://github.com/aleju/imgaug)
    if augmentation:
        import imgaug

        # Augmenters that are safe to apply to masks
        # Some, such as Affine, have settings that make them unsafe, so always
        # test your augmentation on masks
        MASK_AUGMENTERS = ["Sequential", "SomeOf", "OneOf", "Sometimes",
                           "Fliplr", "Flipud", "CropAndPad",
                           "Affine", "PiecewiseAffine"]

        def hook(images, augmenter, parents, default):
            """Determines which augmenters to apply to masks."""
            return augmenter.__class__.__name__ in MASK_AUGMENTERS

        # Store shapes before augmentation to compare
        image_shape = image.shape
        mask_shape = mask.shape
        # Make augmenters deterministic to apply similarly to images and masks
        det = augmentation.to_deterministic()
        image = det.augment_image(image)
        # Change mask to np.uint8 because imgaug doesn't support np.bool
        mask = det.augment_image(mask.astype(np.uint8),
                                 hooks=imgaug.HooksImages(activator=hook))
        # Verify that shapes didn't change
        assert image.shape == image_shape, "Augmentation shouldn't change image size"
        assert mask.shape == mask_shape, "Augmentation shouldn't change mask size"
        # Change mask back to bool
        mask = mask.astype(np.bool)

    # Note that some boxes might be all zeros if the corresponding mask got cropped out.
    # and here is to filter them out
    _idx = np.sum(mask, axis=(0, 1)) > 0
    mask = mask[:, :, _idx]
    class_ids = class_ids[_idx]
    # Bounding boxes. Note that some boxes might be all zeros
    # if the corresponding mask got cropped out.
    # bbox: [num_instances, (y1, x1, y2, x2)]
    bbox = utils.extract_bboxes(mask)

    # Active classes
    # Different datasets have different classes, so track the
    # classes supported in the dataset of this image.
    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]]
    active_class_ids[source_class_ids] = 1

    # Resize masks to smaller size to reduce memory usage
    if use_mini_mask:
        mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)

    # Image meta data
    image_meta = compose_image_meta(image_id, original_shape, image.shape,
                                    window, scale, active_class_ids)

    return image, image_meta, class_ids, bbox, mask

In [11]:
def compose_image_meta(image_id, original_image_shape, image_shape,
                       window, scale, active_class_ids):
    """Takes attributes of an image and puts them in one 1D array.

    image_id: An int ID of the image. Useful for debugging.
    original_image_shape: [H, W, C] before resizing or padding.
    image_shape: [H, W, C] after resizing and padding
    window: (y1, x1, y2, x2) in pixels. The area of the image where the real
            image is (excluding the padding)
    scale: The scaling factor applied to the original image (float32)
    active_class_ids: List of class_ids available in the dataset from which
        the image came. Useful if training on images from multiple datasets
        where not all classes are present in all datasets.
    """
    meta = np.array(
        [image_id] +                  # size=1
        list(original_image_shape) +  # size=3
        list(image_shape) +           # size=3
        list(window) +                # size=4 (y1, x1, y2, x2) in image cooredinates
        [scale] +                     # size=1
        list(active_class_ids)        # size=num_classes
    )
    return meta


In [13]:
def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config):
    """Given the anchors and GT boxes, compute overlaps and identify positive
    anchors and deltas to refine them to match their corresponding GT boxes.

    anchors: [num_anchors, (y1, x1, y2, x2)]
    gt_class_ids: [num_gt_boxes] Integer class IDs.
    gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)]

    Returns:
    rpn_match: [N] (int32) matches between anchors and GT boxes.
               1 = positive anchor, -1 = negative anchor, 0 = neutral
    rpn_bbox: [N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
    """
    # RPN Match: 1 = positive anchor, -1 = negative anchor, 0 = neutral
    rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32)
    # RPN bounding boxes: [max anchors per image, (dy, dx, log(dh), log(dw))]
    rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4))

    # Handle COCO crowds
    # A crowd box in COCO is a bounding box around several instances. Exclude
    # them from training. A crowd box is given a negative class ID.
    crowd_ix = np.where(gt_class_ids < 0)[0]
    if crowd_ix.shape[0] > 0:
        # Filter out crowds from ground truth class IDs and boxes
        non_crowd_ix = np.where(gt_class_ids > 0)[0]
        crowd_boxes = gt_boxes[crowd_ix]
        gt_class_ids = gt_class_ids[non_crowd_ix]
        gt_boxes = gt_boxes[non_crowd_ix]
        # Compute overlaps with crowd boxes [anchors, crowds]
        crowd_overlaps = utils.compute_overlaps(anchors, crowd_boxes)
        crowd_iou_max = np.amax(crowd_overlaps, axis=1)
        no_crowd_bool = (crowd_iou_max < 0.001)
    else:
        # All anchors don't intersect a crowd
        no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool)

    # Compute overlaps [num_anchors, num_gt_boxes]
    overlaps = utils.compute_overlaps(anchors, gt_boxes)

    # Match anchors to GT Boxes
    # If an anchor overlaps a GT box with IoU >= 0.7 then it's positive.
    # If an anchor overlaps a GT box with IoU < 0.3 then it's negative.
    # Neutral anchors are those that don't match the conditions above,
    # and they don't influence the loss function.
    # However, don't keep any GT box unmatched (rare, but happens). Instead,
    # match it to the closest anchor (even if its max IoU is < 0.3).
    #
    # 1. Set negative anchors first. They get overwritten below if a GT box is
    # matched to them. Skip boxes in crowd areas.
    anchor_iou_argmax = np.argmax(overlaps, axis=1)
    anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax]
    rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1
    # 2. Set an anchor for each GT box (regardless of IoU value).
    # If multiple anchors have the same IoU match all of them
    gt_iou_argmax = np.argwhere(overlaps == np.max(overlaps, axis=0))[:,0]
    rpn_match[gt_iou_argmax] = 1
    # 3. Set anchors with high overlap as positive.
    rpn_match[anchor_iou_max >= 0.7] = 1

    # Subsample to balance positive and negative anchors
    # Don't let positives be more than half the anchors
    ids = np.where(rpn_match == 1)[0]
    extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2)
    if extra > 0:
        # Reset the extra ones to neutral
        ids = np.random.choice(ids, extra, replace=False)
        rpn_match[ids] = 0
    # Same for negative proposals
    ids = np.where(rpn_match == -1)[0]
    extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE -
                        np.sum(rpn_match == 1))
    if extra > 0:
        # Rest the extra ones to neutral
        ids = np.random.choice(ids, extra, replace=False)
        rpn_match[ids] = 0

    # For positive anchors, compute shift and scale needed to transform them
    # to match the corresponding GT boxes.
    ids = np.where(rpn_match == 1)[0]
    ix = 0  # index into rpn_bbox
    # TODO: use box_refinement() rather than duplicating the code here
    for i, a in zip(ids, anchors[ids]):
        # Closest gt box (it might have IoU < 0.7)
        gt = gt_boxes[anchor_iou_argmax[i]]

        # Convert coordinates to center plus width/height.
        # GT Box
        gt_h = gt[2] - gt[0]
        gt_w = gt[3] - gt[1]
        gt_center_y = gt[0] + 0.5 * gt_h
        gt_center_x = gt[1] + 0.5 * gt_w
        # Anchor
        a_h = a[2] - a[0]
        a_w = a[3] - a[1]
        a_center_y = a[0] + 0.5 * a_h
        a_center_x = a[1] + 0.5 * a_w

        # Compute the bbox refinement that the RPN should predict.
        rpn_bbox[ix] = [
            (gt_center_y - a_center_y) / a_h,
            (gt_center_x - a_center_x) / a_w,
            np.log(gt_h / a_h),
            np.log(gt_w / a_w),
        ]
        # Normalize
        rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV
        ix += 1

    return rpn_match, rpn_bbox


In [15]:
def mold_image(images, config):
    """Expects an RGB image (or array of images) and subtracts
    the mean pixel and converts it to float. Expects image
    colors in RGB order.
    """
    return images.astype(np.float32) - config.MEAN_PIXEL


In [24]:
import logging
a = data_generator(dataset_train, config)
for train_generator_i in a:
    for train_generator_i_j in train_generator_i:
        print('train_generator_i_j type: %s' % type(train_generator_i_j))
        for train_generator_i_j_k in train_generator_i_j:
            print('train_generator_i_j_k: %s' % train_generator_i_j_k)
            print('train_generator_i_j_k type: %s' % type(train_generator_i_j_k))
            print('train_generator_i_j_k shape: %s' % len(train_generator_i_j_k.shape))
    break

train_generator_i_j type: <class 'list'>
train_generator_i_j_k: [[[[-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   ...
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]]

  [[-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   ...
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]]

  [[-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   ...
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]]

  ...

  [[-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   ...
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]]

  [[-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   ...
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]]

  [[-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   ...
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]
   [-25.7 -68.8  -9.9]]]]
train_generator_i_j_k type: <class 'numpy.ndar

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return mask.astype(np.bool), class_ids.astype(np.int32)


In [2]:
class Config(object):
    """Base configuration class. For custom configurations, create a
    sub-class that inherits from this one and override properties
    that need to be changed.
    """
    # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
    # Useful if your code needs to do things differently depending on which
    # experiment is running.
    NAME = None  # Override in sub-classes

    # NUMBER OF GPUs to use. For CPU training, use 1
    GPU_COUNT = 1

    # Number of images to train with on each GPU. A 12GB GPU can typically
    # handle 2 images of 1024x1024px.
    # Adjust based on your GPU memory and image sizes. Use the highest
    # number that your GPU can handle for best performance.
    IMAGES_PER_GPU = 2

    # Number of training steps per epoch
    # This doesn't need to match the size of the training set. Tensorboard
    # updates are saved at the end of each epoch, so setting this to a
    # smaller number means getting more frequent TensorBoard updates.
    # Validation stats are also calculated at each epoch end and they
    # might take a while, so don't set this too small to avoid spending
    # a lot of time on validation stats.
    STEPS_PER_EPOCH = 1000

    # Number of validation steps to run at the end of every training epoch.
    # A bigger number improves accuracy of validation stats, but slows
    # down the training.
    VALIDATION_STEPS = 50

    # Backbone network architecture
    # Supported values are: resnet50, resnet101.
    # You can also provide a callable that should have the signature
    # of model.resnet_graph. If you do so, you need to supply a callable
    # to COMPUTE_BACKBONE_SHAPE as well
    BACKBONE = "resnet101"

    # Only useful if you supply a callable to BACKBONE. Should compute
    # the shape of each layer of the FPN Pyramid.
    # See model.compute_backbone_shapes
    COMPUTE_BACKBONE_SHAPE = None

    # The strides of each layer of the FPN Pyramid. These values
    # are based on a Resnet101 backbone.
    BACKBONE_STRIDES = [4, 8, 16, 32, 64]

    # Size of the fully-connected layers in the classification graph
    FPN_CLASSIF_FC_LAYERS_SIZE = 1024

    # Size of the top-down layers used to build the feature pyramid
    TOP_DOWN_PYRAMID_SIZE = 256

    # Number of classification classes (including background)
    NUM_CLASSES = 1  # Override in sub-classes

    # Length of square anchor side in pixels
    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)

    # Ratios of anchors at each cell (width/height)
    # A value of 1 represents a square anchor, and 0.5 is a wide anchor
    RPN_ANCHOR_RATIOS = [0.5, 1, 2]

    # Anchor stride
    # If 1 then anchors are created for each cell in the backbone feature map.
    # If 2, then anchors are created for every other cell, and so on.
    RPN_ANCHOR_STRIDE = 1

    # Non-max suppression threshold to filter RPN proposals.
    # You can increase this during training to generate more propsals.
    RPN_NMS_THRESHOLD = 0.7

    # How many anchors per image to use for RPN training
    RPN_TRAIN_ANCHORS_PER_IMAGE = 256

    # ROIs kept after non-maximum supression (training and inference)
    POST_NMS_ROIS_TRAINING = 2000
    POST_NMS_ROIS_INFERENCE = 1000

    # If enabled, resizes instance masks to a smaller size to reduce
    # memory load. Recommended when using high-resolution images.
    USE_MINI_MASK = True
    MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask

    # Input image resizing
    # Generally, use the "square" resizing mode for training and inferencing
    # and it should work well in most cases. In this mode, images are scaled
    # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
    # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
    # padded with zeros to make it a square so multiple images can be put
    # in one batch.
    # Available resizing modes:
    # none:   No resizing or padding. Return the image unchanged.
    # square: Resize and pad with zeros to get a square image
    #         of size [max_dim, max_dim].
    # pad64:  Pads width and height with zeros to make them multiples of 64.
    #         If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales
    #         up before padding. IMAGE_MAX_DIM is ignored in this mode.
    #         The multiple of 64 is needed to ensure smooth scaling of feature
    #         maps up and down the 6 levels of the FPN pyramid (2**6=64).
    # crop:   Picks random crops from the image. First, scales the image based
    #         on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of
    #         size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.
    #         IMAGE_MAX_DIM is not used in this mode.
    IMAGE_RESIZE_MODE = "square"
    IMAGE_MIN_DIM = 800
    IMAGE_MAX_DIM = 1024
    # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further
    # up scaling. For example, if set to 2 then images are scaled up to double
    # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it.
    # Howver, in 'square' mode, it can be overruled by IMAGE_MAX_DIM.
    IMAGE_MIN_SCALE = 0

    # Image mean (RGB)
    MEAN_PIXEL = np.array([123.7, 116.8, 103.9])

    # Number of ROIs per image to feed to classifier/mask heads
    # The Mask RCNN paper uses 512 but often the RPN doesn't generate
    # enough positive proposals to fill this and keep a positive:negative
    # ratio of 1:3. You can increase the number of proposals by adjusting
    # the RPN NMS threshold.
    TRAIN_ROIS_PER_IMAGE = 200

    # Percent of positive ROIs used to train classifier/mask heads
    ROI_POSITIVE_RATIO = 0.33

    # Pooled ROIs
    POOL_SIZE = 7
    MASK_POOL_SIZE = 14

    # Shape of output mask
    # To change this you also need to change the neural network mask branch
    MASK_SHAPE = [28, 28]

    # Maximum number of ground truth instances to use in one image
    MAX_GT_INSTANCES = 100

    # Bounding box refinement standard deviation for RPN and final detections.
    RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
    BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])

    # Max number of final detections
    DETECTION_MAX_INSTANCES = 100

    # Minimum probability value to accept a detected instance
    # ROIs below this threshold are skipped
    DETECTION_MIN_CONFIDENCE = 0.7

    # Non-maximum suppression threshold for detection
    DETECTION_NMS_THRESHOLD = 0.3

    # Learning rate and momentum
    # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
    # weights to explode. Likely due to differences in optimzer
    # implementation.
    LEARNING_RATE = 0.001
    LEARNING_MOMENTUM = 0.9

    # Weight decay regularization
    WEIGHT_DECAY = 0.0001

    # Loss weights for more precise optimization.
    # Can be used for R-CNN training setup.
    LOSS_WEIGHTS = {
        "rpn_class_loss": 1.,
        "rpn_bbox_loss": 1.,
        "mrcnn_class_loss": 1.,
        "mrcnn_bbox_loss": 1.,
        "mrcnn_mask_loss": 1.
    }

    # Use RPN ROIs or externally generated ROIs for training
    # Keep this True for most situations. Set to False if you want to train
    # the head branches on ROI generated by code rather than the ROIs from
    # the RPN. For example, to debug the classifier head without having to
    # train the RPN.
    USE_RPN_ROIS = True

    # Train or freeze batch normalization layers
    #     None: Train BN layers. This is the normal mode
    #     False: Freeze BN layers. Good when using a small batch size
    #     True: (don't use). Set layer in training mode even when inferencing
    TRAIN_BN = False  # Defaulting to False since batch size is often small

    # Gradient norm clipping
    GRADIENT_CLIP_NORM = 5.0

    def __init__(self):
        """Set values of computed attributes."""
        # Effective batch size
        self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT

        # Input image size
        if self.IMAGE_RESIZE_MODE == "crop":
            self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, 3])
        else:
            self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])

        # Image meta data length
        # See compose_image_meta() for details
        self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES

    def display(self):
        """Display Configuration values."""
        print("\nConfigurations:")
        for a in dir(self):
            if not a.startswith("__") and not callable(getattr(self, a)):
                print("{:30} {}".format(a, getattr(self, a)))
        print("\n")


In [4]:
from mrcnn import model as modellib, utils

import os
import sys
import time
import numpy as np
import imgaug  # https://github.com/aleju/imgaug (pip3 install imgaug)

# Download and install the Python COCO tools from https://github.com/waleedka/coco
# That's a fork from the original https://github.com/pdollar/coco with a bug
# fix for Python 3.
# I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
# If the PR is merged then use the original repo.
# Note: Edit PythonAPI/Makefile and replace "python" with "python3".
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from pycocotools import mask as maskUtils

import zipfile
import urllib.request
import shutil

DEFAULT_DATASET_YEAR = "2014"
coco_path = 'C:/Users/zhhua/OneDrive/Desktop/PythonProgram/CAT/COCO/annotations_trainval2017'
############################################################
#  Configurations
############################################################


class CocoConfig(Config):
    """Configuration for training on MS COCO.
    Derives from the base Config class and overrides values specific
    to the COCO dataset.
    """
    # Give the configuration a recognizable name
    NAME = "coco"

    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    IMAGES_PER_GPU = 2

    # Uncomment to train on 8 GPUs (default is 1)
    # GPU_COUNT = 8

    # Number of classes (including background)
    NUM_CLASSES = 1 + 80  # COCO has 80 classes


############################################################
#  Dataset
############################################################

class CocoDataset(utils.Dataset):
    def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
                  class_map=None, return_coco=False, auto_download=False):
        """Load a subset of the COCO dataset.
        dataset_dir: The root directory of the COCO dataset.
        subset: What to load (train, val, minival, valminusminival)
        year: What dataset year to load (2014, 2017) as a string, not an integer
        class_ids: If provided, only loads images that have the given classes.
        class_map: TODO: Not implemented yet. Supports maping classes from
            different datasets to the same class ID.
        return_coco: If True, returns the COCO object.
        auto_download: Automatically download and unzip MS-COCO images and annotations
        """

        if auto_download is True:
            self.auto_download(dataset_dir, subset, year)

        coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
        if subset == "minival" or subset == "valminusminival":
            subset = "val"
        image_dir = "{}/{}{}".format(dataset_dir, subset, year)

        # Load all classes or a subset?
        if not class_ids:
            # All classes
            class_ids = sorted(coco.getCatIds())

        # All images or a subset?
        if class_ids:
            image_ids = []
            for id in class_ids:
                image_ids.extend(list(coco.getImgIds(catIds=[id])))
            # Remove duplicates
            image_ids = list(set(image_ids))
        else:
            # All images
            image_ids = list(coco.imgs.keys())

        # Add classes
        for i in class_ids:
            self.add_class("coco", i, coco.loadCats(i)[0]["name"])

        # Add images
        for i in image_ids:
            self.add_image(
                "coco", image_id=i,
                path=os.path.join(image_dir, coco.imgs[i]['file_name']),
                width=coco.imgs[i]["width"],
                height=coco.imgs[i]["height"],
                annotations=coco.loadAnns(coco.getAnnIds(
                    imgIds=[i], catIds=class_ids, iscrowd=None)))
        if return_coco:
            return coco

    def auto_download(self, dataDir, dataType, dataYear):
        """Download the COCO dataset/annotations if requested.
        dataDir: The root directory of the COCO dataset.
        dataType: What to load (train, val, minival, valminusminival)
        dataYear: What dataset year to load (2014, 2017) as a string, not an integer
        Note:
            For 2014, use "train", "val", "minival", or "valminusminival"
            For 2017, only "train" and "val" annotations are available
        """

        # Setup paths and file names
        if dataType == "minival" or dataType == "valminusminival":
            imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
            imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
            imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
        else:
            imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
            imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
            imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
        # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)

        # Create main folder if it doesn't exist yet
        if not os.path.exists(dataDir):
            os.makedirs(dataDir)

        # Download images if not available locally
        if not os.path.exists(imgDir):
            os.makedirs(imgDir)
            print("Downloading images to " + imgZipFile + " ...")
            with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
                shutil.copyfileobj(resp, out)
            print("... done downloading.")
            print("Unzipping " + imgZipFile)
            with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
                zip_ref.extractall(dataDir)
            print("... done unzipping")
        print("Will use images in " + imgDir)

        # Setup annotations data paths
        annDir = "{}/annotations".format(dataDir)
        if dataType == "minival":
            annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
            annFile = "{}/instances_minival2014.json".format(annDir)
            annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
            unZipDir = annDir
        elif dataType == "valminusminival":
            annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
            annFile = "{}/instances_valminusminival2014.json".format(annDir)
            annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
            unZipDir = annDir
        else:
            annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
            annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
            annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
            unZipDir = dataDir
        # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)

        # Download annotations if not available locally
        if not os.path.exists(annDir):
            os.makedirs(annDir)
        if not os.path.exists(annFile):
            if not os.path.exists(annZipFile):
                print("Downloading zipped annotations to " + annZipFile + " ...")
                with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
                    shutil.copyfileobj(resp, out)
                print("... done downloading.")
            print("Unzipping " + annZipFile)
            with zipfile.ZipFile(annZipFile, "r") as zip_ref:
                zip_ref.extractall(unZipDir)
            print("... done unzipping")
        print("Will use annotations in " + annFile)

    def load_mask(self, image_id):
        """Load instance masks for the given image.

        Different datasets use different ways to store masks. This
        function converts the different mask format to one format
        in the form of a bitmap [height, width, instances].

        Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a COCO image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "coco":
            return super(CocoDataset, self).load_mask(image_id)

        instance_masks = []
        class_ids = []
        annotations = self.image_info[image_id]["annotations"]
        # Build mask of shape [height, width, instance_count] and list
        # of class IDs that correspond to each channel of the mask.
        for annotation in annotations:
            class_id = self.map_source_class_id(
                "coco.{}".format(annotation['category_id']))
            if class_id:
                m = self.annToMask(annotation, image_info["height"],
                                   image_info["width"])
                # Some objects are so small that they're less than 1 pixel area
                # and end up rounded out. Skip those objects.
                if m.max() < 1:
                    continue
                # Is it a crowd? If so, use a negative class ID.
                if annotation['iscrowd']:
                    # Use negative class ID for crowds
                    class_id *= -1
                    # For crowd masks, annToMask() sometimes returns a mask
                    # smaller than the given dimensions. If so, resize it.
                    if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
                        m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
                instance_masks.append(m)
                class_ids.append(class_id)

        # Pack instance masks into an array
        if class_ids:
            mask = np.stack(instance_masks, axis=2).astype(np.bool)
            class_ids = np.array(class_ids, dtype=np.int32)
            return mask, class_ids
        else:
            # Call super class to return an empty mask
            return super(CocoDataset, self).load_mask(image_id)

    def image_reference(self, image_id):
        """Return a link to the image in the COCO Website."""
        info = self.image_info[image_id]
        if info["source"] == "coco":
            return "http://cocodataset.org/#explore?id={}".format(info["id"])
        else:
            super(CocoDataset, self).image_reference(image_id)

    # The following two functions are from pycocotools with a few changes.

    def annToRLE(self, ann, height, width):
        """
        Convert annotation which can be polygons, uncompressed RLE to RLE.
        :return: binary mask (numpy 2D array)
        """
        segm = ann['segmentation']
        if isinstance(segm, list):
            # polygon -- a single object might consist of multiple parts
            # we merge all parts into one mask rle code
            rles = maskUtils.frPyObjects(segm, height, width)
            rle = maskUtils.merge(rles)
        elif isinstance(segm['counts'], list):
            # uncompressed RLE
            rle = maskUtils.frPyObjects(segm, height, width)
        else:
            # rle
            rle = ann['segmentation']
        return rle

    def annToMask(self, ann, height, width):
        """
        Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
        :return: binary mask (numpy 2D array)
        """
        rle = self.annToRLE(ann, height, width)
        m = maskUtils.decode(rle)
        return m


############################################################
#  COCO Evaluation
############################################################

def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
    """Arrange resutls to match COCO specs in http://cocodataset.org/#format
    """
    # If no results, return an empty list
    if rois is None:
        return []

    results = []
    for image_id in image_ids:
        # Loop through detections
        for i in range(rois.shape[0]):
            class_id = class_ids[i]
            score = scores[i]
            bbox = np.around(rois[i], 1)
            mask = masks[:, :, i]

            result = {
                "image_id": image_id,
                "category_id": dataset.get_source_class_id(class_id, "coco"),
                "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
                "score": score,
                "segmentation": maskUtils.encode(np.asfortranarray(mask))
            }
            results.append(result)
    return results


def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
    """Runs official COCO evaluation.
    dataset: A Dataset object with valiadtion data
    eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
    limit: if not 0, it's the number of images to use for evaluation
    """
    # Pick COCO images from the dataset
    image_ids = image_ids or dataset.image_ids

    # Limit to a subset
    if limit:
        image_ids = image_ids[:limit]

    # Get corresponding COCO image IDs.
    coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]

    t_prediction = 0
    t_start = time.time()

    results = []
    for i, image_id in enumerate(image_ids):
        # Load image
        image = dataset.load_image(image_id)

        # Run detection
        t = time.time()
        r = model.detect([image], verbose=0)[0]
        t_prediction += (time.time() - t)

        # Convert results to COCO format
        # Cast masks to uint8 because COCO tools errors out on bool
        image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
                                           r["rois"], r["class_ids"],
                                           r["scores"],
                                           r["masks"].astype(np.uint8))
        results.extend(image_results)

    # Load results. This modifies results with additional attributes.
    coco_results = coco.loadRes(results)

    # Evaluate
    cocoEval = COCOeval(coco, coco_results, eval_type)
    cocoEval.params.imgIds = coco_image_ids
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()

    print("Prediction time: {}. Average {}/image".format(
        t_prediction, t_prediction / len(image_ids)))
    print("Total time: ", time.time() - t_start)


############################################################
#  Training
############################################################


# Configurations
config = CocoConfig()
config.display()

# Create model
model = modellib.MaskRCNN(mode="training", config=config, model_dir=MODEL_DIR)

# Select weights file to load
model_path = COCO_MODEL_PATH

# Load weights
print("Loading weights ", model_path)
model.load_weights(model_path, by_name=True)

# Train
# Training dataset. Use the training set and 35K from the
# validation set, as as in the Mask RCNN paper.
dataset_train = CocoDataset()
dataset_train.load_coco(coco_path, "train", year='2014')

dataset_train.load_coco(coco_path, "valminusminival", year='2014')
dataset_train.prepare()

# Validation dataset
dataset_val = CocoDataset()
val_type = "val"
dataset_val.load_coco(coco_path, val_type, year='2014')
dataset_val.prepare()

# Image Augmentation
# Right/Left flip 50% of the time
augmentation = imgaug.augmenters.Fliplr(0.5)

# *** This training schedule is an example. Update to your needs ***

# Training - Stage 1
print("Training network heads")
model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE,
            epochs=40,
            layers='heads',
            augmentation=augmentation)

# Training - Stage 2
# Finetune layers from ResNet stage 4 and up
print("Fine tune Resnet stage 4 and up")
model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE,
            epochs=120,
            layers='4+',
            augmentation=augmentation)

# Training - Stage 3
# Fine tune all layers
print("Fine tune all layers")
model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE / 10,
            epochs=160,
            layers='all',
            augmentation=augmentation)


  if os.name is 'nt':



Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     2
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 2
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                93
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE                     [28, 28]
MAX_GT_INSTA

NameError: name 'MODEL_DIR' is not defined