In [1]:
import cv2
import os
import sys
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import time
# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

import desk
# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log
from samples.balloon import balloon

import cv_visualize

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

VIDEO_PATH = os.path.join(ROOT_DIR,'samples/my/MAH03248.MP4')

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
config = desk.DeskConfig()

class InferenceConfig(config.__class__):
    # Run detection on one image at a time
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    
config = InferenceConfig()
config.display()

DEVICE = "/cpu:0"  # /cpu:0 or /gpu:0
TEST_MODE = "inference"

def get_ax(rows=1, cols=1, size=16):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Adjust the size attribute to control how big to render images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

with tf.device(DEVICE):
    model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR,
                              config=config)
    
weights_path = model.find_last()
#weights_path = os.path.join(ROOT_DIR,"logs/desks20200307T0307/mask_rcnn_desks_0050.h5")

print("Loading weights ", weights_path)
model.load_weights(weights_path, by_name=True)

class_names = ['BG','desk']


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  256
IMAGE_META_SIZE                14
IMAGE_MIN_DIM                  256
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [256 256   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE             

In [3]:
video = cv2.VideoCapture(VIDEO_PATH)

fps = video.get(cv2.CAP_PROP_FPS)
frameCount = video.get(cv2.CAP_PROP_FRAME_COUNT)
size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))

videoWriter = cv2.VideoWriter('trans.mp4', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, size) 

print(f'fps:{fps} frameCount:{frameCount} size:{size}')

fps:25.0 frameCount:1452.0 size:(1440, 1080)


In [5]:
success, image = video.read()
frame_now = 1
percent = 0
time_now = time.time()
time_last = time.time()
time_spend = 0
time_left = 0
time_avr = 0
while success:
    time_now = time.time()
    results = model.detect([image])
    r = results[0]
    image = cv_visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], 
                            class_names, r['scores'])
    videoWriter.write(image)
    
    time_spend += time_now - time_last
    time_avr = time_spend / frame_now
    time_left = (frameCount - frame_now)*time_avr
    print('\rframe:{:<4d}/{}\t\t{}%\tETA {:<2d}min {:<2d}s\tspend {:<2d}min {:<2d}s'.format(frame_now,
                                                                                            int(frameCount),
                                                                                            round(frame_now/frameCount*100,3),
                                                                                            int(time_left/60),
                                                                                            int(time_left%60),
                                                                                            int(time_spend/60),
                                                                                            int(time_spend%60)
                                                                                           ),
          end='')
    
    time_last = time_now
    
    success, image = video.read()
    frame_now += 1

video.release()
videoWriter.release()

frame:1383/1452		95.248%	ETA 1 min 40s	spend 33min 33s