# Mask R-CNN - Train on Shapes Dataset


This notebook shows how to train Mask R-CNN on your own dataset. To keep things simple we use a synthetic dataset of shapes (squares, triangles, and circles) which enables fast training. You'd still need a GPU, though, because the network backbone is a Resnet101, which would be too slow to train on a CPU. On a GPU, you can start to get okay-ish results in a few minutes, and good results in less than an hour.

The code of the *Shapes* dataset is included below. It generates images on the fly, so it doesn't require downloading any data. And it can generate images of any size, so we pick a small image size to train faster. 

In [None]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import glob

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

## Configurations

In [None]:
class RoomsConfig(Config):
    NAME = "objects"
    IMAGES_PER_GPU=2
    NUM_CLASSES = 4 #floor, structure, prop, furniture
    IMAGE_MIN_DIM = 480
    IMAGE_MAX_DIM = 640
    STEPS_PER_EPOCH = 100

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5
    
config = RoomsConfig()
config.display()

## Notebook Preferences

In [None]:
def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

## Dataset

Create a synthetic dataset

Extend the Dataset class and add a method to load the shapes dataset, `load_shapes()`, and override the following methods:

* load_image()
* load_mask()
* image_reference()

In [None]:
    name = "test"    
    DIR =os.path.join(ROOT_DIR, "NYU_DIRECTORY/"+name)
    DIR=os.path.join(DIR, "**/*colors.png")
    DIR

In [None]:
class RoomsDataset(utils.Dataset):
    def load_rooms(self, name, height, width):
                # Add classes
        self.add_class("objects", 1, "floor")#(7, 54, 66)
        self.add_class("objects",2 , "structure")#(147, 161, 161)
        self.add_class("objects", 3, "prop")#(181, 137, 0)
        self.add_class("objects", 4,"furniture")#(203, 75, 22)
        DIR1 =os.path.join(ROOT_DIR, "NYU_DIRECTORY/"+name)
        DIR=os.path.join(DIR1, "**/*colors.png")
        print(DIR)
        for file in glob.glob(DIR, recursive = True):
            head, tail = os.path.split(file)
            self.add_image(source = ["floor", "structure", "prop", "furniture"] ,image_id = tail[:5], path = file, height = height, width=width)
     
    def image_reference(self, image_id):
        """Return the shapes data of the image."""
        info = self.image_info[image_id]
        if info["source"] == "objects":
            return info["objects"]
        else:
            super(self.__class__).image_reference(self, image_id)
            
    def load_mask(self, image_id):
        DIR = os.path.join(ROOT_DIR)
        info = self.image_info[image_id]
        objects = info['source']
        image_name="{}{}"
        image_name = image_name.format(info['id'], "_ground_truth.png")
        mask_source = find(image_name, DIR)
        mask_image = cv2.imread(mask_source)
        print(mask_source)
        mask = np.zeros([info['height'], info['width'], 4], dtype=np.uint8)
        for i in range(480):
            for j in range(640):
                pixel = mask_image[i,j]
                p = (pixel[0], pixel[1], pixel[2])

                #     (7, 54, 66),  # floor
                if p == (66, 54, 7): # pixel[0]== 66 and pixel[1] == 54 and pixel[2] == 7:
                    mask[i][j][0] = 0b00000001
                #    (147, 161, 161),  # structure
                elif pixel[0] == 161 and pixel[1]==161 and pixel[2] == 147:
                    mask[i][j][1] = 0b00000010
                        #(181, 137, 0),  # prop
                elif pixel[0] ==0 and pixel[1] == 137 and pixel[2] ==181:
                    mask[i][j][2] = 0b00000100
                    #    (203, 75, 22),  # furniture
                elif pixel[0]==22 and pixel[1] ==75 and pixel[2]==203:
                    mask[i][j][3] = 0b00001000
                    
        class_ids = np.array([self.class_names.index(s) for s in objects])
        return mask.astype(np.bool), class_ids.astype(np.int32)

def find(name, path):
    print(name, path)
    for root, dirs, files in os.walk(path):
        if name in files:
            return os.path.join(root, name)


In [None]:
dataset_train =RoomsDataset()
dataset_train.load_rooms("training", 480, 640)
dataset_train.prepare()

dataset_val = RoomsDataset()
dataset_val.load_rooms("testing", 480, 640)
dataset_val.prepare()

In [None]:
plt.imshow(dataset_train.load_image(4))

In [None]:
dataset_val.image_info

In [None]:
# Load and display random samples
image_ids = np.random.choice(dataset_train.image_ids, 4)
for image_id in image_ids:
    image = dataset_train.load_image(image_id)
    mask, class_ids = dataset_train.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

## Create Model

In [None]:
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)

In [None]:
# Which weights to start with?
init_with = "coco"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    # See README for instructions to download the COCO weights
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last(), by_name=True)

## Training

Train in two stages:
1. Only the heads. Here we're freezing all the backbone layers and training only the randomly initialized layers (i.e. the ones that we didn't use pre-trained weights from MS COCO). To train only the head layers, pass `layers='heads'` to the `train()` function.

2. Fine-tune all layers. For this simple example it's not necessary, but we're including it to show the process. Simply pass `layers="all` to train all layers.

In [None]:
# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs=1, 
            layers='heads')

In [None]:
# Fine tune all layers
# Passing layers="all" trains all layers. You can also 
# pass a regular expression to select which layers to
# train by name pattern.
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE / 10,
            epochs=2, 
            layers="all")

In [None]:
# Save weights
# Typically not needed because callbacks save after every epoch
# Uncomment to save manually
# model_path = os.path.join(MODEL_DIR, "mask_rcnn_shapes.h5")
# model.keras_model.save_weights(model_path)

## Detection

In [None]:
class InferenceConfig(ShapesConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
model_path = model.find_last()

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

In [None]:
# Test on a random image
image_id = random.choice(dataset_val.image_ids)
original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset_val, inference_config, 
                           image_id, use_mini_mask=False)

log("original_image", original_image)
log("image_meta", image_meta)
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)

visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
                            dataset_train.class_names, figsize=(8, 8))

In [None]:
results = model.detect([original_image], verbose=1)

r = results[0]
visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], 
                            dataset_val.class_names, r['scores'], ax=get_ax())

## Evaluation

In [None]:
# Compute VOC-Style mAP @ IoU=0.5
# Running on 10 images. Increase for better accuracy.
image_ids = np.random.choice(dataset_val.image_ids, 10)
APs = []
for image_id in image_ids:
    # Load image and ground truth data
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
        modellib.load_image_gt(dataset_val, inference_config,
                               image_id, use_mini_mask=False)
    molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
    # Run object detection
    results = model.detect([image], verbose=0)
    r = results[0]
    # Compute AP
    AP, precisions, recalls, overlaps =\
        utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                         r["rois"], r["class_ids"], r["scores"], r['masks'])
    APs.append(AP)
    
print("mAP: ", np.mean(APs))