In [1]:
import numpy as np
import cv2 as cv

In [2]:
# Initialize the parameters
confThreshold = 0.5  #Confidence threshold; all the boxes below this are ignored for further processing
maskThreshold = 0.3  # Mask threshold; thresholds the grey mask image, lower value is bigger mask

In [3]:
# Load names of classes
"""
The file mscoco_labels.names contains all the objects for which the model was trained. 
We read class names. Then we read and load the colors.txt file containing all the colors used to mask objects of various classes.

Next, we load the network using these two files —

    frozen_inference_graph.pb : The pre-trained weights.
    mask_rcnn_inception_v2_coco_2018_01_28.pbtxt : The text graph file that has been tuned by the OpenCV’s DNN support group, so that the network 
    can be loaded using OpenCV.

We set the DNN backend to OpenCV here and the target to CPU. 
You could try setting the preferable target to cv.dnn.DNN_TARGET_OPENCL to run it on a GPU. 
But keep in mind that the DNN module in the current OpenCV version is tested only with Intel’s GPUs."""

root = "mask_rcnn/"
classesFile = "mscoco_labels.names";
classes = None
with open(root+classesFile, 'rt') as f:
   classes = f.read().rstrip('\n').split('\n')
 
# Load the colors
colorsFile = "colors.txt";
with open(root+colorsFile, 'rt') as f:
    colorsStr = f.read().rstrip('\n').split('\n')
colors = []
for i in range(len(colorsStr)):
    rgb = colorsStr[i].split(' ')
    color = np.array([float(rgb[0]), float(rgb[1]), float(rgb[2])])
    colors.append(color)
 
# Give the textGraph and weight files for the model
textGraph = root + "mask_rcnn.pbtxt";
modelWeights = root + "frozen_inference_graph.pb";
 
# Load the network
net = cv.dnn.readNetFromTensorflow(modelWeights, textGraph);
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

In [4]:
"""Step 4 : Read the input

In this step we read the image, video stream or the webcam. 
In addition, we also open the video writer to save the frames with detected output bounding boxes."""
import argparse

outputFile = "mask_rcnn_out_py.jpg"

image_path = "path/to/your/image.jpg"

ap = argparse.ArgumentParser()
ap.add_argument('--image', help='Path to input image')
ap.add_argument('--video', help='Path to input image')
args = ap.parse_args()

if (args.image):
    # Open the image file
    if not os.path.isfile(args.image):
        print("Input image file ", args.image, " doesn't exist")
        sys.exit(1)
    cap = cv.VideoCapture(args.image)
    outputFile = args.image[:-4]+'_mask_rcnn_out_py.jpg'
elif (args.video):
    # Open the video file
    if not os.path.isfile(args.video):
        print("Input video file ", args.video, " doesn't exist")
        sys.exit(1)
    cap = cv.VideoCapture(args.video)
    outputFile = args.video[:-4]+'_mask_rcnn_out_py.avi'
else:
    # Webcam input
    cap = cv.VideoCapture(0)
 
# Get the video writer initialized to save the output video
if (not args.image):
    vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 28, (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))

usage: ipykernel_launcher.py [-h] [--image IMAGE] [--video VIDEO]
ipykernel_launcher.py: error: unrecognized arguments: -f C:\Users\alexw\AppData\Roaming\jupyter\runtime\kernel-fcaf36c4-9eb2-4a85-a968-12c5f22c7097.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [1]:
import random

num_classes = 90  # Number of MS COCO classes
with open("colors.txt", "w") as f:
    for _ in range(num_classes):
        color = [random.randint(0, 255) for _ in range(3)]
        f.write(" ".join(map(str, color)) + "\n")