# Mask R-CNN - Train pods dataset


This notebook shows how to train Mask R-CNN implemented on coco on your own dataset. I trained the model to segment pods objects in an image. You'd need a GPU, because the network backbone is a Resnet101, which would be too slow to train on a CPU. The code is executable on google colaboratory GPU. On google colab you can start to get okay-ish results in a few minutes, and good results in less than an hour.

In [None]:
!rm -rf pods_dataset/
!rm -rf sample_data/
!rm -rf logs/

In [None]:
!pip install tensorflow==1.13.1
!pip install keras==2.2.5
!pip install imgaug==0.4.0
!pip install scikit-image==0.16.2
!pip install h5py==2.10.0
!pip install numpy==1.18.5
!pip install mrcnn-colab

In [None]:
!git clone https://github.com/barbaraport/pods_dataset.git

In [None]:
!python --version

In [None]:
!pip show keras

In [None]:
!pip show tensorflow

In [None]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import json
import pandas as pd
import skimage

from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize

from mrcnn.config import Config
import mrcnn.utils as utils
import mrcnn.model as modellib
import mrcnn.visualize as visualize
from mrcnn.model import log

import imgaug.augmenters as iaa

# Data Path
TRAIN_PATH = 'pods_dataset/trainData/stage2_train/'
TEST_PATH = 'pods_dataset/trainData/stage2_test/'

# Get train and test IDs
train_ids = next(os.walk(TRAIN_PATH))[1]
test_ids = next(os.walk(TEST_PATH))[1]

%matplotlib inline 

# Root directory of the project
ROOT_DIR = os.getcwd()

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

## Configurations

In [None]:
class ShapesConfig(Config):
    """Configuration for training on the dataset.
    Derives from the base Config class and overrides values specific
    to the dataset.
    """
    BACKBONE = "resnet50"

    # Give the configuration a recognizable name
    NAME = "shapes"

    # Train on 1 GPU and 1 images per GPU. We can put multiple images on each
    # GPU. Batch size is (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    BATCH_SIZE = 32

    # Number of classes (including background)
    NUM_CLASSES = 2

    # Steps per epoch 
    STEPS_PER_EPOCH = 500

    # Image resize mode
    # No changes to the image
    IMAGE_RESIZE_MODE = "none"
    IMAGE_MAX_DIM = 1024
    IMAGE_MIN_DIM = 1024

    # Minimum probability value to accept a detected instance
    # ROIs below this threshold are skipped
    DETECTION_MIN_CONFIDENCE = 0.6

    # Non-maximum suppression threshold for detection
    DETECTION_NMS_THRESHOLD = 0.1

    # Length of square anchor side in pixels
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)

    # Non-max suppression threshold to filter RPN proposals.
    # You can increase this during training to generate more proposals.
    RPN_NMS_THRESHOLD = 0.1

    
config = ShapesConfig()
config.display()

## Notebook Preferences

In [None]:
def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

## Dataset

Create a synthetic dataset

Extend the Dataset class and add a method to load the shapes dataset, `load_shapes()`, and override the following methods:

* load_image()
* load_mask()
* image_reference()

In [None]:
class ShapesDataset(utils.Dataset):
    
    def load_shapes(self, mode, is_train=True):
        self.add_class("shapes", 1, "pod")
        
        images_dir = TRAIN_PATH + "images/"
        annotations_dir = TRAIN_PATH + "annotations/"

        if not is_train:
            images_dir = TEST_PATH + "images/"
            annotations_dir = TEST_PATH + "annotations/"

        filenames = os.listdir(images_dir)
        files_quantity = len(filenames)

        for i in range(files_quantity):
            filename = filenames[i]
            image_id = i
            
            image_path = images_dir + filename
            annotation_path = annotations_dir + filename[:-4] + '.json'

            annotation = json.load(open(os.path.join(annotation_path)))

            shapes = [] 
            class_ids = []
            labels_list = []

            for shape in annotation["shapes"]:
                label = shape["label"]
                if labels_list.count(label) == 0:
                    labels_list.append(label)
                class_ids.append(labels_list.index(label)+1)
                points = shape["points"]
                shapes.append(points)
            
            width = annotation["imageWidth"]
            height = annotation["imageHeight"]
            
            self.add_image('shapes', image_id = image_id, path = image_path, annotation = annotation_path, width = width, height = height, shapes = shapes, class_ids = class_ids)
            i += 1   

    def load_image(self, image_id):
        
        info = self.image_info[image_id]
        path = info.get("path")

        img = imread(path)[:,:,:3]
        img = resize(img, (config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1]), mode='constant', preserve_range=True)
       
        return img

    def image_reference(self, image_id):
        info = self.image_info[image_id]
        if info["source"] == "shapes":
            return info["shapes"]
        else:
            super(self.__class__).image_reference(self, image_id)

    def load_mask(self, image_id):
        info = self.image_info[image_id]
        mask = np.zeros([info["height"], info["width"], len(info["shapes"])], dtype=np.uint8)

        for idx, points in enumerate(info["shapes"]):
            pointsy, pointsx = zip(*points)
            rr, cc = skimage.draw.polygon(pointsx, pointsy)
            mask[rr, cc, idx] = 1

        masks_np = mask.astype(np.bool)
        classids_np = np.array(info["class_ids"]).astype(np.int32)
        
        return masks_np, classids_np

In [None]:
# Training dataset
dataset_train = ShapesDataset()
dataset_train.load_shapes('shapes', is_train=True)
dataset_train.prepare()
print('Train: %d' % len(dataset_train.image_ids))

# Validation dataset
dataset_val = ShapesDataset()
dataset_val.load_shapes('shapes', is_train=False)
dataset_val.prepare()
print('Validation: %d' % len(dataset_val.image_ids))

In [None]:
# Load and display random samples
image_ids = np.random.choice(dataset_train.image_ids, 4)
for image_id in image_ids:
    image = dataset_train.load_image(image_id)
    mask, class_ids = dataset_train.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

## Create Model

In [None]:
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,model_dir=MODEL_DIR)

In [None]:
# Which weights to start with?
init_with = "last"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    model.load_weights("mask_rcnn_coco.h5", by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights("mask_rcnn_shapes_0006.h5", by_name=True)

## Training

Train in two stages:
1. Only the heads. Here we're freezing all the backbone layers and training only the randomly initialized layers (i.e. the ones that we didn't use pre-trained weights from MS COCO). To train only the head layers, pass `layers='heads'` to the `train()` function.

2. Fine-tune all layers. For this simple example it's not necessary, but we're including it to show the process. Simply pass `layers="all` to train all layers.

In [None]:
augmentation = iaa.OneOf([
    iaa.Crop(px=(0, 16)),
    iaa.Fliplr(1.0),
    iaa.Flipud(1.0),
    iaa.Affine(
        rotate=(45, -45),
        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}),
])

In [None]:
model.train(dataset_train, dataset_val,
    learning_rate=config.LEARNING_RATE/ 10,
    epochs=6,
    layers='heads',
    augmentation=augmentation
)

In [None]:
model.train(dataset_train, dataset_val,
    learning_rate=config.LEARNING_RATE / 10,
    epochs=10,
    layers='all',
    augmentation=augmentation
)

In [None]:
%reload_ext tensorboard.notebook
%load_ext tensorboard.notebook
%tensorboard --logdir logs

## Detection

In [None]:
class InferenceConfig(ShapesConfig):
    NAME = "eS_inference"

inference_config = InferenceConfig()
inference_config.display()

In [None]:
# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
# model_path = model.find_last()[1]
model_path = "mask_rcnn_shapes_0004.h5"

# Load trained weights (fill in path to trained weights here)
assert model_path != "", "Provide path to trained weights"
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

In [None]:
test_image = "pods_dataset/trainData/stage2_train/images/1fd56af5-befa-4bdd-871e-15edb7523cad_1.jpg" # Choose random or provide an image path

if test_image == "random":
    # Test on a random image
    image_id = random.choice(dataset_val.image_ids)
    original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
        modellib.load_image_gt(dataset_val, inference_config, 
                            image_id, use_mini_mask=False)
        
    log("original_image", original_image)
    log("image_meta", image_meta)
    log("gt_class_id", gt_class_id)
    log("gt_bbox", gt_bbox)
    log("gt_mask", gt_mask)

    visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, dataset_train.class_names, figsize=(8, 8))
else:
    assert test_image != "", "Choose the type of test"
    assert test_image != "random", "Please, provide an image path"
    original_image = cv2.imread(test_image)


In [None]:
results = model.detect([original_image], verbose=1)

r = results[0]
visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], 
                            dataset_val.class_names, r['scores'], ax=get_ax())

## Evaluation

In [None]:
# Compute VOC-Style mAP
# Increase images quantity for better accuracy.
def calculate_mAP(dataset, image_ids):
    APs = []

    for image_id in image_ids:
        # Load image and ground truth data
        image, image_meta, gt_class_id, gt_bbox, gt_mask =\
            modellib.load_image_gt(dataset, inference_config,
                                image_id, use_mini_mask=False)
        molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
        # Run object detection
        results = model.detect([image], verbose=0)
        r = results[0]
        # Compute AP
        AP, precisions, recalls, overlaps =\
            utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                            r["rois"], r["class_ids"], r["scores"], r['masks'], iou_threshold=0.1)
        APs.append(AP)
    return APs

image_val_ids = dataset_val.image_ids
image_val_APs = calculate_mAP(dataset_val, image_val_ids)

image_train_ids = dataset_train.image_ids
image_train_APs = calculate_mAP(dataset_train, image_train_ids)

APs = np.concatenate((image_train_APs, image_val_APs))

mAP = np.mean(APs)

In [None]:
print("mAP: ", mAP)