<a href="https://colab.research.google.com/github/WoradeeKongthong/object_detection_with_MaskRCNN/blob/master/object_detection_on_video.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mask R-CNN Object Detection on Video 

## Clone Mast-RCNN model with Tensorflow2

In [1]:
import os
if not os.path.isdir("./Mask-RCNN-TF2") :
  !git clone https://github.com/ahmedfgad/Mask-RCNN-TF2.git

## Install packages

In [2]:
!pip install -r Mask-RCNN-TF2/requirements.txt



## Import additional packages

In [3]:
import random
import sys
import cv2
from google.colab import files
import numpy as np

## Import mrcnn

In [5]:
# set root directory to Mask_RCNN
ROOT_DIR = 'Mask-RCNN-TF2'

# add root dir to path
sys.path.append(ROOT_DIR) 

# import mrcnn
import mrcnn
import mrcnn.config
import mrcnn.model
import mrcnn.visualize
from mrcnn import utils

Using TensorFlow backend.


## Get class names of COCO dataset

In [6]:
# load the class label names from disk, one label per line
CLASS_NAMES = open(ROOT_DIR+"/samples/coco_labels.txt").read().strip().split("\n")

## Create pre-trained Mask R-CNN (trained on COCO Dataset)

### Create config

In [7]:
class SimpleConfig(mrcnn.config.Config):
    # Give the configuration a recognizable name
    NAME = "coco_inference"
    
    # set the number of GPUs to use along with the number of images per GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

	# Number of classes = number of classes + 1 (+1 for the background). The background class is named BG
    NUM_CLASSES = len(CLASS_NAMES)


### Create model architecture

In [8]:
# Initialize the Mask R-CNN model for inference and then load the weights.
# This step builds the Keras model architecture.
model = mrcnn.model.MaskRCNN(mode="inference", 
                             config=SimpleConfig(),
                             model_dir=os.getcwd())

### Load trained weights to the model

In [9]:
# Local path to save trained weights file
COCO_MODEL_PATH = os.path.join('.', "mask_rcnn_coco.h5")

# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

# Load the weights into the model.
model.load_weights(filepath=COCO_MODEL_PATH, 
                   by_name=True)


Downloading pretrained model to ./mask_rcnn_coco.h5 ...
... done downloading pretrained model!


# Run object detection on video

## Create helper functions for create visualization on frame

In [20]:
def random_colors(N):
    np.random.seed(1)
    colors = [tuple(255 * np.random.rand(3)) for _ in range(N)]
    return colors

colors = random_colors(len(CLASS_NAMES))
class_dict = {
    name: color for name, color in zip(CLASS_NAMES, colors)
}

In [None]:
def apply_mask(image, mask, color, alpha=0.5):
    """apply mask to image"""
    for n, c in enumerate(color):
        image[:, :, n] = np.where(
            mask == 1,
            image[:, :, n] * (1 - alpha) + alpha * c,
            image[:, :, n]
        )
    return image

In [None]:
def display_instances(image, boxes, masks, ids, names, scores):
    """
        take the image and results and apply the mask, box, and Label
    """
    n_instances = boxes.shape[0]

    if not n_instances:
        pass
    else:
        assert boxes.shape[0] == masks.shape[-1] == ids.shape[0]

    for i in range(n_instances):
        if not np.any(boxes[i]):
            continue

        y1, x1, y2, x2 = boxes[i]
        label = names[ids[i]]
        color = class_dict[label]
        score = scores[i] if scores is not None else None
        caption = '{} {:.2f}'.format(label, score) if score else label
        mask = masks[:, :, i]

        image = apply_mask(image, mask, color)
        image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
        image = cv2.putText(
            image, caption, (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 0.7, color, 2
        )

    return image

### Specify input and output paths

In [23]:
input_file_path = "/content/japanese-seaside-walk.mp4"
output_file_path = "/content/japanese-seaside-walk.avi"

### Create video writer object

In [24]:
frameWidth = 640
frameHeight = 480
fourcc = cv2.VideoWriter_fourcc('M','J','P','G')
result = cv2.VideoWriter(output_file_path, fourcc, 10.0, (frameWidth,frameHeight))

### Run object detection and write to output video

In [None]:
cap = cv2.VideoCapture(input_file_path)
while cap.isOpened():
    check, frame = cap.read()
    if check == True:
        frame = cv2.resize(frame, (frameWidth, frameHeight))
        results = model.detect([frame])
        r = results[0]
        img = display_instances(frame, r['rois'], 
                                r['masks'], r['class_ids'], 
                                CLASS_NAMES, r['scores'])
        result.write(img)
    else:
        break


### Download result


In [None]:
files.download(output_file_path)