In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%load_ext line_profiler

import tensorflow as tf
slim = tf.contrib.slim
tf.logging.set_verbosity(tf.logging.INFO)
sess_config = tf.ConfigProto()
#config.gpu_options.allow_growth=True
#config.gpu_options.per_process_gpu_memory_fraction=0.45

import sys
import os
import pickle

ADE20K_DATA = '/gpfs01/bethge/data/ADE20K_2016_07_26'
COCO_DATA = '/gpfs01/bethge/share/mscoco/COCO'
PASCAL_VOC_DATA = '/gpfs01/bethge/data/PascalVOC'
MASK_RCNN_MODEL_PATH = 'Mask_RCNN/'
SLIM_MODELS_PATH = 'slim/'
TRANSFORMER_MODELS_PATH = 'transformer/'
SIAMESE_MASK_RCNN_PATH = '/gpfs01/bethge/home/cmichaelis/projects/2018-03_Siamese_Mask_RCNN/siamese-mask-rcnn/'

if MASK_RCNN_MODEL_PATH not in sys.path:
    sys.path.append(MASK_RCNN_MODEL_PATH)
if SIAMESE_MASK_RCNN_PATH not in sys.path:
    sys.path.append(SIAMESE_MASK_RCNN_PATH)
    
from samples.coco import coco
from mrcnn import utils
from mrcnn import model as modellib
from mrcnn import visualize
    
if SLIM_MODELS_PATH not in sys.path:
    sys.path.append(SLIM_MODELS_PATH)
if TRANSFORMER_MODELS_PATH not in sys.path:
    sys.path.append(TRANSFORMER_MODELS_PATH)
    
import utils as siamese_utils
import model as siamese_model
import siamese_mrcnn_models as model_zoo
    
import time
import random
import numpy as np
import skimage.io
import imgaug
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (12.0, 6.0)

from spatial_transformer import transformer

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))

# Root directory of the project
ROOT_DIR = os.getcwd()

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
coco_nopascal_classes = [8,10,11,12,13,14,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,60,62,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80]
coco_pascal_classes = np.array(range(1,81))[np.array([i not in coco_nopascal_classes for i in range(1,81)])]

In [3]:
with open('pascal_ade20k.pkl', 'rb') as f:
    pascal_ade20k = pickle.load(f)

ade20k_pascal_classes = np.array(sorted(list(set(sum([l for l in pascal_ade20k.values()], [])))))
ade20k_nopascal_classes = np.setdiff1d(range(1, 3149), ade20k_pascal_classes)

In [4]:
class TrainConfig(coco.CocoConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 6
    LEARNING_RATE = 0.001
    NUM_CLASSES = 3148 + 1
    TARGET_MAX_DIM = 96
    TARGET_MIN_DIM = 75
    IMAGE_MIN_DIM = 400
    IMAGE_MAX_DIM = 512
    #IMAGE_RESIZE_MODE = 'none'
    TARGET_SHAPE = np.array([TARGET_MAX_DIM, TARGET_MAX_DIM, 3])
    TARGET_PADDING = True
    MAX_TARGET_INSTANCES = 10
    # Reduce model size for prototyping
    BACKBONE = 'resnet50'
    FPN_FEATUREMAPS = 128
    RPN_ANCHOR_STRIDE = 2
    RPN_TRAIN_ANCHORS_PER_IMAGE = 64
    POST_NMS_ROIS_TRAINING = 250
    POST_NMS_ROIS_INFERENCE = 250
    TRAIN_ROIS_PER_IMAGE = 50
    DETECTION_MAX_INSTANCES = 15
    DETECTION_NMS_THRESHOLD = 0.5
    MAX_GT_INSTANCES = 15
    LOSS_WEIGHTS = {'rpn_class_loss': 2.0, 
                    'rpn_bbox_loss': 0.2, 
                    'mrcnn_class_loss': 2.0, 
                    'mrcnn_bbox_loss': 0.5, 
                    'mrcnn_mask_loss': 1.0}

config = TrainConfig()
config.display()


Configurations:
BACKBONE                       resnet50
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     6
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        15
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.5
FPN_CLASSIF_FC_LAYERS_SIZE     1024
FPN_FEATUREMAPS                128
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 6
IMAGE_MAX_DIM                  512
IMAGE_META_SIZE                3161
IMAGE_MIN_DIM                  400
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [512 512   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 2.0, 'rpn_bbox_loss': 0.2, 'mrcnn_class_loss': 2.0, 'mrcnn_bbox_loss': 0.5, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE           

In [5]:
# Load COCO/train dataset
coco_train = siamese_utils.IndexedCocoDataset()
coco_train.set_active_classes(coco_nopascal_classes)
coco_train.load_coco(COCO_DATA, "train", year="2017")
coco_train.prepare()
coco_train.build_indices()
coco_train.ACTIVE_CLASSES = np.array(range(1,81))

# Load COCO/val dataset
coco_val = siamese_utils.IndexedCocoDataset()
coco_val.set_active_classes(coco_nopascal_classes)
coco_val.load_coco(COCO_DATA, "val", year="2017")
coco_val.prepare()
coco_val.build_indices()
coco_val.ACTIVE_CLASSES = np.array(range(1,81))

loading annotations into memory...
Done (t=21.25s)
creating index...
index created!
loading annotations into memory...
Done (t=2.17s)
creating index...
index created!


In [6]:
# Load ADE20K/train dataset
ade20k_train = siamese_utils.IndexedADE20KDataset()
ade20k_train.set_active_classes(ade20k_nopascal_classes)
ade20k_train.load_ade20k(ADE20K_DATA, "train")
ade20k_train.prepare()
ade20k_train.build_indices()
ade20k_train.ACTIVE_CLASSES = np.array(range(1,3148))

# Load ADE20K/val dataset
ade20k_val = siamese_utils.IndexedADE20KDataset()
ade20k_val.set_active_classes(ade20k_nopascal_classes)
ade20k_val.load_ade20k(ADE20K_DATA, "val")
ade20k_val.prepare()
ade20k_val.build_indices()
ade20k_val.ACTIVE_CLASSES = np.array(range(1,3148))

### Multi Dataset Loader

In [7]:
def siamese_data_generator(datasets, config, shuffle=True, augmentation=imgaug.augmenters.Fliplr(0.5), random_rois=0,
                   batch_size=1, detection_targets=False, diverse=0):
    """A generator that returns images and corresponding target class ids,
    bounding box deltas, and masks.
    dataset: The Dataset object to pick data from
    config: The model config object
    shuffle: If True, shuffles the samples before every epoch
    augment: If True, applies image augmentation to images (currently only
             horizontal flips are supported)
    random_rois: If > 0 then generate proposals to be used to train the
                 network classifier and mask heads. Useful if training
                 the Mask RCNN part without the RPN.
    batch_size: How many images to return in each call
    detection_targets: If True, generate detection targets (class IDs, bbox
        deltas, and masks). Typically for debugging or visualizations because
        in trainig detection targets are generated by DetectionTargetLayer.
    diverse: Float in [0,1] indicatiing probability to draw a target
        from any random class instead of one from the image classes
    Returns a Python generator. Upon calling next() on it, the
    generator returns two lists, inputs and outputs. The containtes
    of the lists differs depending on the received arguments:
    inputs list:
    - images: [batch, H, W, C]
    - image_meta: [batch, size of image meta]
    - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral)
    - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
    - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
    - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
    - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width
                are those of the image unless use_mini_mask is True, in which
                case they are defined in MINI_MASK_SHAPE.
    outputs list: Usually empty in regular training. But if detection_targets
        is True then the outputs list contains target class_ids, bbox deltas,
        and masks.
    """
    b = 0  # batch item index
    error_count = 0
    
    number_of_datsets = len(datasets)
    image_index = []
    image_ids = []
    for dataset in datasets:
        image_ids.append(np.copy(dataset.image_ids))
        image_index.append(-1)

    # Anchors
    # [anchor_count, (y1, x1, y2, x2)]
    backbone_shapes = modellib.compute_backbone_shapes(config, config.IMAGE_SHAPE)
    anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                             config.RPN_ANCHOR_RATIOS,
                                             backbone_shapes,
                                             config.BACKBONE_STRIDES,
                                             config.RPN_ANCHOR_STRIDE)

    # Keras requires a generator to run indefinately.
    while True:
        try:
            dataset_id = np.random.choice(range(number_of_datsets))
            dataset = datasets[dataset_id]
            
            # Increment index to pick next image. Shuffle if at the start of an epoch.
            image_index[dataset_id] = (image_index[dataset_id] + 1) % len(image_ids[dataset_id])
            if shuffle and image_index[dataset_id] == 0:
                np.random.shuffle(image_ids[dataset_id])

            # Get GT bounding boxes and masks for image.
            image_id = image_ids[dataset_id][image_index[dataset_id]]
            image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
                modellib.load_image_gt(dataset, config, image_id, augmentation=augmentation,
                              use_mini_mask=config.USE_MINI_MASK)

            # Replace class ids with foreground/background info if binary
            # class option is chosen
            # if binary_classes == True:
            #    gt_class_ids = np.minimum(gt_class_ids, 1)

            # Skip images that have no instances. This can happen in cases
            # where we train on a subset of classes and the image doesn't
            # have any of the classes we care about.
            if not np.any(gt_class_ids > 0):
                continue
                
#             print(gt_class_ids)

            # Use only positive class_ids
            categories = np.unique(gt_class_ids)
            _idx = categories > 0
            categories = categories[_idx]
            # Use only active classes
            active_categories = []
            for c in categories:
                if any(c == dataset.ACTIVE_CLASSES):
                    active_categories.append(c)
            
            # Skiop image if it contains no instance of any active class    
            if not np.any(np.array(active_categories) > 0):
                continue
            # Randomly select category
            category = np.random.choice(active_categories)
                
            # Generate siamese target crop
            target = siamese_utils.get_one_target(category, dataset, config, augmentation=augmentation)
            if target is None: # fix until a better ADE20K metadata is built
                print('skip target')
                continue
#             print(target_class_id)
            target_class_id = category
            target_class_ids = np.array([target_class_id])
            
            idx = gt_class_ids == target_class_id
            siamese_class_ids = idx.astype('int8')
#             print(idx)
#             print(gt_boxes.shape, gt_masks.shape)
            siamese_class_ids = siamese_class_ids[idx]
            gt_class_ids = gt_class_ids[idx]
            gt_boxes = gt_boxes[idx,:]
            gt_masks = gt_masks[:,:,idx]
#             print(gt_boxes.shape, gt_masks.shape)

            # RPN Targets
            rpn_match, rpn_bbox = modellib.build_rpn_targets(image.shape, anchors,
                                                    gt_class_ids, gt_boxes, config)

            # Mask R-CNN Targets
            if random_rois:
                rpn_rois = modellib.generate_random_rois(
                    image.shape, random_rois, gt_class_ids, gt_boxes)
                if detection_targets:
                    rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask =\
                        modellib.build_detection_targets(
                            rpn_rois, gt_class_ids, gt_boxes, gt_masks, config)

            # Init batch arrays
            if b == 0:
                batch_image_meta = np.zeros(
                    (batch_size, 3148+13), dtype=image_meta.dtype)
                batch_rpn_match = np.zeros(
                    [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype)
                batch_rpn_bbox = np.zeros(
                    [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype)
                batch_images = np.zeros(
                    (batch_size,) + image.shape, dtype=np.float32)
                batch_gt_class_ids = np.zeros(
                    (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32)
                batch_gt_boxes = np.zeros(
                    (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32)
                batch_targets = np.zeros(
                    (batch_size,) + target.shape, dtype=np.float32)
#                 batch_target_class_ids = np.zeros(
#                     (batch_size, config.MAX_TARGET_INSTANCES), dtype=np.int32)
                if config.USE_MINI_MASK:
                    batch_gt_masks = np.zeros((batch_size, config.MINI_MASK_SHAPE[0], config.MINI_MASK_SHAPE[1],
                                               config.MAX_GT_INSTANCES))
                else:
                    batch_gt_masks = np.zeros(
                        (batch_size, image.shape[0], image.shape[1], config.MAX_GT_INSTANCES))
                if random_rois:
                    batch_rpn_rois = np.zeros(
                        (batch_size, rpn_rois.shape[0], 4), dtype=rpn_rois.dtype)
                    if detection_targets:
                        batch_rois = np.zeros(
                            (batch_size,) + rois.shape, dtype=rois.dtype)
                        batch_mrcnn_class_ids = np.zeros(
                            (batch_size,) + mrcnn_class_ids.shape, dtype=mrcnn_class_ids.dtype)
                        batch_mrcnn_bbox = np.zeros(
                            (batch_size,) + mrcnn_bbox.shape, dtype=mrcnn_bbox.dtype)
                        batch_mrcnn_mask = np.zeros(
                            (batch_size,) + mrcnn_mask.shape, dtype=mrcnn_mask.dtype)

            # If more instances than fits in the array, sub-sample from them.
            if gt_boxes.shape[0] > config.MAX_GT_INSTANCES:
                ids = np.random.choice(
                    np.arange(gt_boxes.shape[0]), config.MAX_GT_INSTANCES, replace=False)
                gt_class_ids = gt_class_ids[ids]
                siamese_class_ids = siamese_class_ids[ids]
                gt_boxes = gt_boxes[ids]
                gt_masks = gt_masks[:, :, ids]
                

            # Add to batch
            batch_image_meta[b][:image_meta.shape[0]] = image_meta
            batch_rpn_match[b] = rpn_match[:, np.newaxis]
            batch_rpn_bbox[b] = rpn_bbox
            batch_images[b] = modellib.mold_image(image.astype(np.float32), config)
            batch_targets[b] = modellib.mold_image(target.astype(np.float32), config)
            batch_gt_class_ids[b, :siamese_class_ids.shape[0]] = siamese_class_ids
#             batch_target_class_ids[b, :target_class_ids.shape[0]] = target_class_ids
            batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes
            batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks
            if random_rois:
                batch_rpn_rois[b] = rpn_rois
                if detection_targets:
                    batch_rois[b] = rois
                    batch_mrcnn_class_ids[b] = mrcnn_class_ids
                    batch_mrcnn_bbox[b] = mrcnn_bbox
                    batch_mrcnn_mask[b] = mrcnn_mask
            b += 1

            # Batch full?
            if b >= batch_size:
                inputs = [batch_images, batch_image_meta, batch_targets, batch_rpn_match, batch_rpn_bbox,
                          batch_gt_class_ids, batch_gt_boxes, batch_gt_masks]
                outputs = []

                if random_rois:
                    inputs.extend([batch_rpn_rois])
                    if detection_targets:
                        inputs.extend([batch_rois])
                        # Keras requires that output and targets have the same number of dimensions
                        batch_mrcnn_class_ids = np.expand_dims(
                            batch_mrcnn_class_ids, -1)
                        outputs.extend(
                            [batch_mrcnn_class_ids, batch_mrcnn_bbox, batch_mrcnn_mask])

                yield inputs, outputs

                # start a new batch
                b = 0
        except (GeneratorExit, KeyboardInterrupt):
            raise
        except:
            # Log it and skip the image
            modellib.logging.exception("Error processing image {}".format(
                dataset.image_info[image_id]))
            error_count += 1
            if error_count > 5:
                raise

In [8]:
siamese_utils.siamese_data_generator = siamese_data_generator

### New

In [9]:
import keras
import keras.backend as K
import keras.layers as KL
import keras.initializers as KI
import keras.engine as KE
import keras.models as KM
import multiprocessing

In [10]:
class NewSiameseMaskRCNN(model_zoo.NewSiameseMaskRCNN):
    """Encapsulates the Mask RCNN model functionality.
    The actual Keras model is in the keras_model property.
    """

    def build(self, mode, config):
        """Build Mask R-CNN architecture.
            input_shape: The shape of the input image.
            mode: Either "training" or "inference". The inputs and
                outputs of the model differ accordingly.
        """
        assert mode in ['training', 'inference']

        # Image size must be dividable by 2 multiple times
        h, w = config.IMAGE_SHAPE[:2]
        if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
            raise Exception("Image size must be dividable by 2 at least 6 times "
                            "to avoid fractions when downscaling and upscaling."
                            "For example, use 256, 320, 384, 448, 512, ... etc. ")

        # Inputs
        input_image = KL.Input(
            shape=config.IMAGE_SHAPE.tolist(), name="input_image")
        # CHANGE: add target input
        input_target = KL.Input(
            shape=config.TARGET_SHAPE.tolist(), name="input_target")
        input_image_meta = KL.Input(shape=[config.IMAGE_META_SIZE],
                                    name="input_image_meta")
        if mode == "training":
            # RPN GT
            input_rpn_match = KL.Input(
                shape=[None, 1], name="input_rpn_match", dtype=tf.int32)
            input_rpn_bbox = KL.Input(
                shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32)

            # Detection GT (class IDs, bounding boxes, and masks)
            # 1. GT Class IDs (zero padded)
            input_gt_class_ids = KL.Input(
                shape=[None], name="input_gt_class_ids", dtype=tf.int32)
            # 2. GT Boxes in pixels (zero padded)
            # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates
            input_gt_boxes = KL.Input(
                shape=[None, 4], name="input_gt_boxes", dtype=tf.float32)
            # Normalize coordinates
            gt_boxes = KL.Lambda(lambda x: modellib.norm_boxes_graph(
                x, K.shape(input_image)[1:3]))(input_gt_boxes)
            # 3. GT Masks (zero padded)
            # [batch, height, width, MAX_GT_INSTANCES]
            if config.USE_MINI_MASK:
                input_gt_masks = KL.Input(
                    shape=[config.MINI_MASK_SHAPE[0],
                           config.MINI_MASK_SHAPE[1], None],
                    name="input_gt_masks", dtype=bool)
            else:
                input_gt_masks = KL.Input(
                    shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None],
                    name="input_gt_masks", dtype=bool)
        elif mode == "inference":
            # Anchors in normalized coordinates
            input_anchors = KL.Input(shape=[None, 4], name="input_anchors")

        # Build the shared convolutional layers.
        # CHANGE: Use weightshared FPN model for image and target
        # Create FPN Model
        resnet = siamese_model.build_resnet_model(self.config)
        fpn = model_zoo.build_new_fpn_model(feature_maps=self.config.FPN_FEATUREMAPS)
        siamese_distance = model_zoo.build_siamese_distance_model(feature_maps=self.config.FPN_FEATUREMAPS//32)
        # Create Image FP
        _, IC2, IC3, IC4, IC5 = resnet(input_image)
        IP2, IP3, IP4, IP5, IP6 = fpn([IC2, IC3, IC4, IC5])
        # Create Target FR
        _, TC2, TC3, TC4, TC5 = resnet(input_target)
        TP2, TP3, TP4, TP5, TP6 = fpn([TC2, TC3, TC4, TC5])
        
        # CHANGE: add siamese distance copmputation
        # Combine FPs using L1 distance
        DP2 = siamese_distance([IP2, TP2, TP3, TP4, TP5, TP6])
        DP3 = siamese_distance([IP3, TP2, TP3, TP4, TP5, TP6])
        DP4 = siamese_distance([IP4, TP2, TP3, TP4, TP5, TP6])
        DP5 = siamese_distance([IP5, TP2, TP3, TP4, TP5, TP6])
        DP6 = siamese_distance([IP6, TP2, TP3, TP4, TP5, TP6])
        
        # CHANGE: combine original and siamese features
        P2 = KL.Concatenate()([IP2, DP2])
        P3 = KL.Concatenate()([IP3, DP3])
        P4 = KL.Concatenate()([IP4, DP4])
        P5 = KL.Concatenate()([IP5, DP5])
        P6 = KL.Concatenate()([IP6, DP6])

        # Note that P6 is used in RPN, but not in the classifier heads.
        rpn_feature_maps = [P2, P3, P4, P5, P6]
        mrcnn_feature_maps = [P2, P3, P4, P5]

        # Anchors
        if mode == "training":
            anchors = self.get_anchors(config.IMAGE_SHAPE)
            # Duplicate across the batch dimension because Keras requires it
            # TODO: can this be optimized to avoid duplicating the anchors?
            anchors = np.broadcast_to(anchors, (config.BATCH_SIZE,) + anchors.shape)
            # A hack to get around Keras's bad support for constants
            anchors = KL.Lambda(lambda x: tf.Variable(anchors), name="anchors")(input_image)
        else:
            anchors = input_anchors

        # RPN Model
        # CHANGE: Set number of filters to 208 [128 original + 5*4*4 L1 + CC]
        rpn = modellib.build_rpn_model(config.RPN_ANCHOR_STRIDE,
                              len(config.RPN_ANCHOR_RATIOS), (self.config.FPN_FEATUREMAPS + 5*4*4))
        # Loop through pyramid layers
        layer_outputs = []  # list of lists
        for p in rpn_feature_maps:
            layer_outputs.append(rpn([p]))
        # Concatenate layer outputs
        # Convert from list of lists of level outputs to list of lists
        # of outputs across levels.
        # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
        output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"]
        outputs = list(zip(*layer_outputs))
        outputs = [KL.Concatenate(axis=1, name=n)(list(o))
                   for o, n in zip(outputs, output_names)]

        rpn_class_logits, rpn_class, rpn_bbox = outputs

        # Generate proposals
        # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates
        # and zero padded.
        proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training"\
            else config.POST_NMS_ROIS_INFERENCE
        rpn_rois = modellib.ProposalLayer(
            proposal_count=proposal_count,
            nms_threshold=config.RPN_NMS_THRESHOLD,
            name="ROI",
            config=config)([rpn_class, rpn_bbox, anchors])

        if mode == "training":
            # Class ID mask to mark class IDs supported by the dataset the image
            # came from.
            active_class_ids = KL.Lambda(
                lambda x: modellib.parse_image_meta_graph(x)["active_class_ids"]
                )(input_image_meta)

            if not config.USE_RPN_ROIS:
                # Ignore predicted ROIs and use ROIs provided as an input.
                input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4],
                                      name="input_roi", dtype=np.int32)
                # Normalize coordinates
                target_rois = KL.Lambda(lambda x: modellig.norm_boxes_graph(
                    x, K.shape(input_image)[1:3]))(input_rois)
            else:
                target_rois = rpn_rois

            # Generate detection targets
            # Subsamples proposals and generates target outputs for training
            # Note that proposal class IDs, gt_boxes, and gt_masks are zero
            # padded. Equally, returned rois and targets are zero padded.
            rois, target_class_ids, target_bbox, target_mask =\
                modellib.DetectionTargetLayer(config, name="proposal_targets")([
                    target_rois, input_gt_class_ids, gt_boxes, input_gt_masks])

            # Network Heads
            # TODO: verify that this handles zero padded ROIs
            # CHANGE: reduce number of classes to 2
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
                model_zoo.new_fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta,
                                     config.POOL_SIZE, num_classes=2,
                                     train_bn=config.TRAIN_BN)
            # CHANGE: reduce number of classes to 2
            mrcnn_mask = model_zoo.new_fpn_mask_graph(rois, mrcnn_feature_maps,
                                              input_image_meta,
                                              config.MASK_POOL_SIZE,
                                              num_classes=2,
                                              train_bn=config.TRAIN_BN)

            # TODO: clean up (use tf.identify if necessary)
            output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois)

            # Losses
            rpn_class_loss = KL.Lambda(lambda x: modellib.rpn_class_loss_graph(*x), name="rpn_class_loss")(
                [input_rpn_match, rpn_class_logits])
            rpn_bbox_loss = KL.Lambda(lambda x: modellib.rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")(
                [input_rpn_bbox, input_rpn_match, rpn_bbox])
            # CHANGE: use custom class loss without using active_class_ids
            class_loss = KL.Lambda(lambda x: siamese_model.mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")(
                [target_class_ids, mrcnn_class_logits, active_class_ids])
            bbox_loss = KL.Lambda(lambda x: modellib.mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")(
                [target_bbox, target_class_ids, mrcnn_bbox])
            mask_loss = KL.Lambda(lambda x: modellib.mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")(
                [target_mask, target_class_ids, mrcnn_mask])

            # Model
            # CHANGE: Added target to inputs
            inputs = [input_image, input_image_meta, input_target,
                      input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks]
            if not config.USE_RPN_ROIS:
                inputs.append(input_rois)
            outputs = [rpn_class_logits, rpn_class, rpn_bbox,
                       mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask,
                       rpn_rois, output_rois,
                       rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss]
            model = KM.Model(inputs, outputs, name='mask_rcnn')
        else:
            # Network Heads
            # Proposal classifier and BBox regressor heads
            # CHANGE: reduce number of classes to 2
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
                fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, input_image_meta,
                                     config.POOL_SIZE, num_classes=2,
                                     train_bn=config.TRAIN_BN)

            # Detections
            # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in 
            # normalized coordinates
            detections = modellib.DetectionLayer(config, name="mrcnn_detection")(
                [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta])

            # Create masks for detections
            detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections)
            # CHANGE: reduce number of classes to 2
            mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps,
                                              input_image_meta,
                                              config.MASK_POOL_SIZE,
                                              num_classes=2,
                                              train_bn=config.TRAIN_BN)

            # CHANGE: Added target to the input
            model = KM.Model([input_image, input_image_meta, input_target, input_anchors],
                             [detections, mrcnn_class, mrcnn_bbox,
                                 mrcnn_mask, rpn_rois, rpn_class, rpn_bbox],
                             name='mask_rcnn')

        # Add multi-GPU support.
        if config.GPU_COUNT > 1:
            from mrcnn.parallel_model import ParallelModel
            model = ParallelModel(model, config.GPU_COUNT)

        return model

In [11]:
# Create model object in inference mode.
model = NewSiameseMaskRCNN(mode="training", model_dir=MODEL_DIR, config=config)

In [12]:
# Load weights trained on Imagenet
# model.load_weights('/gpfs01/bethge/home/cmichaelis/projects/2018-03_Siamese_Mask_RCNN/logs/imagenet20180511T1119/mask_rcnn_imagenet_0810.h5', by_name=True)
# model.set_log_dir()
model.set_trainable("(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)")
# model.load_weights('/gpfs01/bethge/home/cmichaelis/projects/2018-03_Siamese_Mask_RCNN/logs/coco20180614T2205/mask_rcnn_coco_0236.h5', by_name=True)
model.load_weights('logs/coco20180614T2205/mask_rcnn_coco_0240.h5', by_name=True)


Selecting layers to train
In model:  resnet_model
In model:  fpn_model
In model:  fpn_l1_model
In model:  rpn_model
Re-starting from epoch 240


In [None]:
model.train([coco_train, ade20k_train], [coco_val, ade20k_val], learning_rate=config.LEARNING_RATE, epochs=120, layers="heads")
model.config.LOSS_WEIGHTS = {'rpn_class_loss': 2.0, 'rpn_bbox_loss': 0.2/2, 'mrcnn_class_loss': 2.0, 'mrcnn_bbox_loss': 0.5, 'mrcnn_mask_loss': 1.0}
model.train([coco_train, ade20k_train], [coco_val, ade20k_val], learning_rate=config.LEARNING_RATE, epochs=240, layers="4+")
# model.config.LOSS_WEIGHTS = {'rpn_class_loss': 2.0/2, 'rpn_bbox_loss': 0.2/2, 'mrcnn_class_loss': 2.0, 'mrcnn_bbox_loss': 0.5/2, 'mrcnn_mask_loss': 1.0/2}
model.train([coco_train, ade20k_train], [coco_val, ade20k_val], learning_rate=config.LEARNING_RATE, epochs=480, layers="2+")
# model.config.LOSS_WEIGHTS = {'rpn_class_loss': 2.0/4, 'rpn_bbox_loss': 0.2/4, 'mrcnn_class_loss': 2.0, 'mrcnn_bbox_loss': 0.5/2, 'mrcnn_mask_loss': 1.0/2}
model.train([coco_train, ade20k_train], [coco_val, ade20k_val], learning_rate=config.LEARNING_RATE, epochs=960, layers="all")
model.train([coco_train, ade20k_train], [coco_val, ade20k_val], learning_rate=config.LEARNING_RATE/10, epochs=1020, layers="all")
model.train([coco_train, ade20k_train], [coco_val, ade20k_val], learning_rate=config.LEARNING_RATE/100, epochs=1080, layers="all")


Starting at epoch 240. LR=0.001

Checkpoint Path: /gpfs01/bethge/home/iustyuzh/siamese-mask-rcnn/logs/coco20180614T2205/mask_rcnn_coco_{epoch:04d}.h5
Selecting layers to train
In model:  resnet_model
In model:  fpn_model
In model:  fpn_l1_model
In model:  rpn_model

Starting at epoch 240. LR=0.001

Checkpoint Path: /gpfs01/bethge/home/iustyuzh/siamese-mask-rcnn/logs/coco20180614T2205/mask_rcnn_coco_{epoch:04d}.h5
Selecting layers to train
In model:  resnet_model
In model:  fpn_model
In model:  fpn_l1_model
In model:  rpn_model

Starting at epoch 240. LR=0.001

Checkpoint Path: /gpfs01/bethge/home/iustyuzh/siamese-mask-rcnn/logs/coco20180614T2205/mask_rcnn_coco_{epoch:04d}.h5
Selecting layers to train
In model:  resnet_model
In model:  fpn_model
In model:  fpn_l1_model
In model:  rpn_model
Epoch 241/480
Epoch 242/480
Epoch 243/480
 141/1000 [===>..........................] - ETA: 19:18 - loss: 0.9791 - rpn_class_loss: 0.1042 - rpn_bbox_loss: 0.0890 - mrcnn_class_loss: 0.2606 - mrcnn_bb

ERROR:root:Error processing image {'id': 7336, 'source': 'ade20k', 'path': '/gpfs01/bethge/data/ADE20K_2016_07_26/images/training/d/doorway/indoor/ADE_train_00007337.jpg', 'width': 2953, 'height': 2586, 'annotations': {'class_index': array([ 236,  530,  677,  774,  976,  978, 1180, 1247, 1395, 1910, 2420,
       2676, 2684, 2850, 2855, 2932, 2978, 2982, 3055])}}
Traceback (most recent call last):
  File "<ipython-input-7-a1b8c9b77578>", line 101, in siamese_data_generator
    target = siamese_utils.get_one_target(category, dataset, config, augmentation=augmentation)
  File "/gpfs01/bethge/home/iustyuzh/siamese-mask-rcnn/utils.py", line 48, in get_one_target
    box_ind = np.random.choice(np.where(target_class_ids == category)[0])
  File "mtrand.pyx", line 1126, in mtrand.RandomState.choice
ValueError: a must be non-empty


Epoch 244/480

ERROR:root:Error processing image {'id': 373476, 'source': 'coco', 'path': '/gpfs01/bethge/share/mscoco/COCO/train2017/000000373476.jpg', 'width': 500, 'height': 358, 'annotations': [{'segmentation': [[470.04, 251.64, 468.73, 238.86, 470.19, 228.84, 472.07, 225.65, 472.36, 217.22, 472.66, 213.74, 482.82, 214.17, 489.07, 214.46, 487.91, 220.42, 488.34, 225.06, 490.52, 232.62, 491.54, 237.7, 490.81, 244.67, 490.23, 250.63, 490.37, 251.64, 486.74, 253.82, 479.05, 254.4, 472.36, 253.82, 470.48, 253.1]], 'area': 771.8444499999994, 'iscrowd': 0, 'image_id': 373476, 'bbox': [468.73, 213.74, 22.81, 40.66], 'category_id': 44, 'id': 85973}, {'segmentation': [[118.27, 256.07, 121.33, 253.74, 127.12, 252.41, 134.17, 251.75, 144.67, 251.75, 153.58, 251.81, 159.43, 252.01, 163.96, 254.14, 163.29, 257.13, 162.09, 262.79, 159.63, 269.97, 155.84, 273.76, 151.72, 276.62, 143.61, 278.15, 133.03, 278.15, 126.45, 274.82, 121.86, 270.17, 119.87, 263.45, 118.27, 256.67]], 'area': 982.8483999999996, 'iscrowd'

Epoch 245/480
Epoch 246/480
 230/1000 [=====>........................] - ETA: 17:37 - loss: 0.9530 - rpn_class_loss: 0.0978 - rpn_bbox_loss: 0.0867 - mrcnn_class_loss: 0.2395 - mrcnn_bbox_loss: 0.1519 - mrcnn_mask_loss: 0.3765

ERROR:root:Error processing image {'id': 477655, 'source': 'coco', 'path': '/gpfs01/bethge/share/mscoco/COCO/train2017/000000477655.jpg', 'width': 640, 'height': 427, 'annotations': [{'segmentation': [[244.43, 400.16, 237.53, 392.35, 235.89, 381.07, 236.43, 371.98, 242.8, 362.53, 247.52, 356.35, 257.34, 351.44, 264.25, 350.17, 274.25, 349.8, 281.7, 352.53, 289.88, 357.99, 293.52, 363.26, 292.61, 369.44, 291.88, 376.53, 295.16, 377.98, 298.25, 377.98, 298.79, 381.26, 295.16, 392.16, 289.7, 402.16, 287.16, 402.71, 267.34, 401.8, 254.43, 400.89]], 'area': 2644.0787500000006, 'iscrowd': 0, 'image_id': 477655, 'bbox': [235.89, 349.8, 62.9, 52.91], 'category_id': 37, 'id': 303598}]}
Traceback (most recent call last):
  File "<ipython-input-7-a1b8c9b77578>", line 101, in siamese_data_generator
    target = siamese_utils.get_one_target(category, dataset, config, augmentation=augmentation)
  File "/gpfs01/bethge/home/iustyuzh/siamese-mask-rcnn/utils.py", line 48, in get_one_target
    box_ind =

Epoch 247/480
Epoch 248/480
Epoch 249/480

ERROR:root:Error processing image {'id': 188445, 'source': 'coco', 'path': '/gpfs01/bethge/share/mscoco/COCO/train2017/000000188445.jpg', 'width': 640, 'height': 427, 'annotations': [{'segmentation': [[138.73, 323.08, 139.74, 322.12, 141.18, 321.68, 142.19, 321.78, 143.58, 322.5, 144.11, 324.28, 143.78, 325.82, 142.91, 326.44, 141.61, 326.68, 139.31, 326.3, 138.73, 325.24, 138.54, 324.18, 138.83, 323.08]], 'area': 22.0708000000001, 'iscrowd': 0, 'image_id': 188445, 'bbox': [138.54, 321.68, 5.57, 5.0], 'category_id': 37, 'id': 302130}, {'segmentation': [[184.56, 308.77, 194.86, 290.66, 207.66, 272.24, 206.1, 269.12, 201.1, 273.8, 182.68, 308.14]], 'area': 130.29170000000045, 'iscrowd': 0, 'image_id': 188445, 'bbox': [182.68, 269.12, 24.98, 39.65], 'category_id': 39, 'id': 629524}]}
Traceback (most recent call last):
  File "<ipython-input-7-a1b8c9b77578>", line 101, in siamese_data_generator
    target = siamese_utils.get_one_target(category, dataset, config, augmentation=augmentation)


Epoch 250/480