# Mask R-CNN - Squirrel Monkey Segmentation

In [1]:
# Gets rid of a HOST of deprecation warnings for Matterport 
import warnings
warnings.filterwarnings("ignore")

import tensorflow as tf 

# and Tensorflow
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [2]:
import os
import sys
import random
import numpy as np
import matplotlib.pyplot as plt
import skimage
from termcolor import colored

# Root directory of the project
ROOT_DIR = os.path.abspath("./Mask_RCNN")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
# COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
COCO_MODEL_PATH = "C:\\Users\\addis\\Documents\\mask_rcnn_coco.h5"
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

Using TensorFlow backend.


In [3]:
# Check tf version
print(tf.__version__)
print(tf.test.is_gpu_available())

1.15.3
True


## Configurations


In [4]:
class MonkeysConfig(Config):
    # Give the configuration a recognizable name
    NAME = "monkeys"

    # Train on 1 GPU and 1 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 1 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # background + 1 monkey

    # Use large image sizes for greater accuracy
    IMAGE_MIN_DIM = 512
    IMAGE_MAX_DIM = 512
    '''
    # Based on shapes example which had image size 128x128
    
    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 32
    '''
    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 100

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5

config = MonkeysConfig()
config.display()

class InferenceConfig(MonkeysConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  512
IMAGE_META_SIZE                14
IMAGE_MIN_DIM                  512
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [512 512   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE             

## Dataset

Handles loading images and masks for the custom dataset


In [5]:
import json
MONKEY_CLASS_ID_STR = "monkey"


class MonkeysDataset(utils.Dataset):
    def load_monkeys(self, dataset_dir):

        # Add classes
        self.add_class(MONKEY_CLASS_ID_STR, 1, MONKEY_CLASS_ID_STR)

        num_images_added = 0
       # Load annotations
        # VGG Image Annotator (up to version 1.6) saves each image in the form:
        # { 'filename': '28503151_5b5b7ec140_b.jpg',
        #   'regions': {
        #       '0': {
        #           'region_attributes': {},
        #           'shape_attributes': {
        #               'all_points_x': [...],
        #               'all_points_y': [...],
        #               'name': 'polygon'}},
        #       ... more regions ...
        #   },
        #   'size': 100202
        # }
        # We mostly care about the x and y coordinates of each region
        # Note: In VIA 2.0, regions was changed from a dict to a list.
        annotations = json.load(
            open(os.path.join(dataset_dir, "via_region_data.json")))
        annotations = list(annotations.values())  # don't need the dict keys

        # The VIA tool saves images in the JSON even if they don't have any
        # annotations. Skip unannotated images.
        annotations = [a for a in annotations if a['regions']]

        # Add images
        for a in annotations:
            # Get the x, y coordinaets of points of the polygons that make up
            # the outline of each object instance. These are stores in the
            # shape_attributes (see json format above)
            # The if condition is needed to support VIA versions 1.x and 2.x.
            if type(a['regions']) is dict:
                polygons = [r['shape_attributes']
                            for r in a['regions'].values()]
            else:
                polygons = [r['shape_attributes'] for r in a['regions']]

            # load_mask() needs the image size to convert polygons to masks.
            # Unfortunately, VIA doesn't include it in JSON, so we must read
            # the image. This is only managable since the dataset is tiny.
            image_path = os.path.join(dataset_dir, "images", a['filename'])
            image = skimage.io.imread(image_path)
            height, width = image.shape[:2]

            self.add_image(
                MONKEY_CLASS_ID_STR,
                image_id=a['filename'],  # use file name as a unique image id
                path=image_path,
                width=width, height=height,
                polygons=polygons)

            num_images_added += 1
            print(colored(f"Loading images {num_images_added}/{len(annotations)}"), end='\r')

    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a monkey dataset image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != MONKEY_CLASS_ID_STR:
            return super(self.__class__, self).load_mask(image_id)

        # Convert polygons to a bitmap mask of shape
        # [height, width, instance_count]
        info = self.image_info[image_id]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
            mask[rr, cc, i] = 1

        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)

    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == MONKEY_CLASS_ID_STR:
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)


# Loading Dataset

Ensure the dataset is in the following form:

dirName  
└── train  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;├── a.jpg  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;├── b.jpg  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;├── c.jpg  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└── via_region_data.json  
└── val  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;├── c.jpg  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;├── d.jpg  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;├── e.jpg  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;└── via_regon_data.json  

In [6]:
# Dataset.
# Annotated using: https://www.robots.ox.ac.uk/~vgg/software/via/

DATASET_DIR = "D:\\Datasets\\Monkey_Frames\\VGG\\cv set (240)"
dataset = MonkeysDataset()
dataset.load_monkeys(DATASET_DIR)
dataset.prepare()

Loading images 240/240[0m

## Training

Train in two stages:

1. Only the heads. Here we're freezing all the backbone layers and training only the randomly initialized layers (i.e. the ones that we didn't use pre-trained weights from MS COCO). To train only the head layers, pass `layers='heads'` to the `train()` function.

2. Fine-tune all layers. For this simple example it's not necessary, but we're including it to show the process. Simply pass `layers="all` to train all layers.


In [None]:
cv_fold = 3
split_size = dataset.image_ids.size // cv_fold
ids = dataset.image_ids.copy()
np.random.shuffle(ids) # shuffled indexes
print(ids)

for n in range(cv_fold):
    start = n * split_size #start of validation set depends on iteration
    remaining = ids.copy
    
    dataset_val = MonkeysDataset()
    dataset_val.add_class(MONKEY_CLASS_ID_STR, 1, MONKEY_CLASS_ID_STR)
    
    #chunk for validation set
    for i in range(split_size):
        index = ids[start+i]
        dataset_val.add_image(
                source = dataset.image_info[index]['source'],
                image_id = dataset.image_info[index]['id'],
                path = dataset.image_info[index]['path'],
                width = dataset.image_info[index]['width'],
                height = dataset.image_info[index]['height'],
                polygons = dataset.image_info[index]['polygons'])
    
    dataset_val.prepare()
    
    #remaining data for training set
    remaining = np.delete(ids, list(range(start, start+split_size)))
    dataset_train = MonkeysDataset()
    dataset_train.add_class(MONKEY_CLASS_ID_STR, 1, MONKEY_CLASS_ID_STR)
    
    for index in remaining:
        dataset_train.add_image(
                source = dataset.image_info[index]['source'],
                image_id = dataset.image_info[index]['id'],
                path = dataset.image_info[index]['path'],
                width = dataset.image_info[index]['width'],
                height = dataset.image_info[index]['height'],
                polygons = dataset.image_info[index]['polygons'])

    dataset_train.prepare()
    
    
    
    
    
    # Create model in training mode
    model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)
    
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])

    # Train the head branches
    # Passing layers="heads" freezes all layers except the head
    # layers. You can also pass a regular expression to select
    # which layers to train by name pattern.

    NUM_EPOCHS = 10
    model.train(dataset_train, dataset_val, 
                learning_rate=config.LEARNING_RATE, 
                epochs=NUM_EPOCHS, 
                layers='heads')
    
    # Fine tune all layers
    # Passing layers="all" trains all layers. You can also 
    # pass a regular expression to select which layers to
    # train by name pattern.
    FT_EPOCHS = 10
    model.train(dataset_train, dataset_val, 
                learning_rate=config.LEARNING_RATE / 10,
                epochs=NUM_EPOCHS+FT_EPOCHS, 
                layers="all")
    
    
    
    
    
    inference_config = InferenceConfig()

    # Recreate the model in inference mode
    model = modellib.MaskRCNN(mode="inference", 
                              config=inference_config,
                              model_dir=MODEL_DIR)

    # Get path to saved weights
    # Either set a specific path or find last trained weights
    # model_path = os.path.join(ROOT_DIR, ".h5 file name here")
    model_path = model.find_last()

    # Load trained weights
    print("Loading weights from ", model_path)
    model.load_weights(model_path, by_name=True)
    
    
    # **NEED TO CHANGE**
    
    # Compute VOC-Style mAP @ IoU=0.5
    # Running on 10 images. Increase for better accuracy.
    # image_ids = np.random.choice(dataset_val.image_ids, 10)

    image_ids = dataset_val.image_ids

    APs = []
    for image_id in image_ids:
        # Load image and ground truth data
        image, image_meta, gt_class_id, gt_bbox, gt_mask =\
            modellib.load_image_gt(dataset_val, inference_config,
                                   image_id, use_mini_mask=False)
        molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
        # Run object detection
        results = model.detect([image], verbose=0)
        r = results[0]
        # Compute AP
        AP, precisions, recalls, overlaps =\
            utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                             r["rois"], r["class_ids"], r["scores"], r['masks'])
        APs.append(AP)

    print("mAP: ", np.mean(APs))


[125 226 234 128 117  40  86 175 144 113 108 172 187 181 109  27  66 238
  98  43 138  21 195 106 162  70  33  38 114 156 174 214 136 223 105  76
 132 124 190  78 200 126 189  54 157  11 155  13 148  49  60 147  80 111
  72  25  50  91  32  16  95  68  24   0  48 134 188 198  82 150 182 169
  23 121  73 143  46 179 151 203 202  79 152 133 215  19 158 183  56 178
 210  44 161 199  74  22 192  88  93  29   5 164  14 193 180  64  34  47
  42 184 222 204 101 176 206  12   8 207  77 225  18   1  55   4  94  62
 194  89 120 197   3 227 165  83  65 219 201 170 205  37 216 177 139  10
 233  52 141 129 127 218  92 167 212 237 153  35  15 110 211 220 217 171
 208 104 196  69 235 231 119 232  36  81 185 103   6  63 229  20 123 209
 140 166  30 135 115 154 145  39  97 137  57  31 168 191 173 236 149   9
  85  99  45 186 107 116 221  61 213   7 224  51 112  90 122  41  75  87
  53 131  17  96 146 100 118 230  26  84  67 163   2 102  28 160  71 142
  58  59 130 159 239 228]

Starting at epoch 0. LR=