In [1]:
import os
from timeit import time
import warnings
import sys
import cv2
import numpy as np
from PIL import Image
from yolo import YOLO

from deep_sort import preprocessing, nn_matching
from deep_sort.tracker import Tracker
from deep_sort.detection import Detection as ddet

from tools import generate_detections_hassan as gdet

warnings.filterwarnings('ignore')


Using TensorFlow backend.


In [2]:
# load the COCO class labels our Mask R-CNN was trained on
labelsPath = os.path.sep.join(['mask-rcnn-coco', "object_detection_classes_coco.txt"])
LABELS = open(labelsPath).read().strip().split("\n")

# derive the paths to the Mask R-CNN weights and model configuration
weightsPath = os.path.sep.join(['mask-rcnn-coco', "frozen_inference_graph.pb"])
configPath = os.path.sep.join(['mask-rcnn-coco', "mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"])




# load our Mask R-CNN trained on the COCO dataset (90 classes)
# from disk
print("[INFO] loading Mask R-CNN from disk...")
net = cv2.dnn.readNetFromTensorflow(weightsPath, configPath)

[INFO] loading Mask R-CNN from disk...


In [3]:
def mask_rcnn(net, image):
    clone = image.copy()
    (H, W) = image.shape[:2]
    blob = cv2.dnn.blobFromImage(image, swapRB=True, crop=False)
    net.setInput(blob)
    boxes, masks = net.forward(["detection_out_final", "detection_masks"])
    
    
    output = []
    # loop over the number of detected objects
    for i in range(0, boxes.shape[2]):
        # extract the class ID of the detection along with the confidence
        # (i.e., probability) associated with the prediction
        classID = int(boxes[0, 0, i, 1])
        confidence = boxes[0, 0, i, 2]

        # filter out weak predictions by ensuring the detected probability
        # is greater than the minimum probability
        if confidence > 0.3 and LABELS[classID] == 'person':



            # scale the bounding box coordinates back relative to the
            # size of the image and then compute the width and the height
            # of the bounding box
            box = boxes[0, 0, i, 3:7] * np.array([W, H, W, H])
            (startX, startY, endX, endY) = box.astype("int")
            boxW = endX - startX
            boxH = endY - startY
            
            mask = masks[i, classID]
            mask = cv2.resize(mask, (boxW, boxH), interpolation=cv2.INTER_NEAREST)
            mask = (mask > 0.3)
            
            roi = clone[startY:endY, startX:endX]
            visMask = (mask * 255).astype("uint8")            
            instance = cv2.bitwise_and(roi, roi, mask=visMask)
            instance = cv2.resize(instance, (64,128))
            
            #cv2.imwrite('mask.jpg', instance)
            
            
            box[2:] -= box[0:2]
            output.append((box,instance))
    
    return output

In [4]:
# deep_sort Part (instanciation)
################################3
model_filename = 'model_data/mars-small128.pb'
encoder = gdet.create_box_encoder(model_filename,batch_size=1)
max_cosine_distance = 0.3 # Definition of the parameters
nn_budget = None
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
tracker = Tracker(metric, max_iou_distance=0.7, max_age=300, n_init=30)

# video Part
################################
writeVideo_flag = True 
video_capture = cv2.VideoCapture("code_challenge_faimdata.mp4")
if writeVideo_flag:
# Define the codec and create VideoWriter object
    w = int(video_capture.get(3))
    h = int(video_capture.get(4))
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter('labeled_code_challenge_faimdata_RCNN.avi', fourcc, 30, (w, h))

fps = 0.0
while True:
    # read video frame by frame util ret !=True
    ret, frame = video_capture.read()  # frame shape 640*480*3
    if ret != True:
        break
    
        
        
    t1 = time.time()

    

   
    
    # get boxs in image with mask_rcnn
    boxs_masks = mask_rcnn(net, frame)
    boxs_masks = [ box_mask for box_mask in boxs_masks if box_mask[0][2]*box_mask[0][3] < 100000]  
    boxs = [box_mask[0] for box_mask in boxs_masks]

    
    

    
    
    

   # extract features of each boxe
    features = encoder(frame, boxs_masks)
    

    # score to 1.0 here).
    detections = [ddet(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]

    # Run non-maxima suppression.
    boxes = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    nms_max_overlap = 1.0 # Definition of the parameters
    indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
    detections = [detections[i] for i in indices]

    # Call the tracker
    tracker.predict()
    tracker.update(detections)

    for track in tracker.tracks:
        if not track.is_confirmed() or track.time_since_update > 1:
            continue 
        bbox = track.to_tlbr()
        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
        cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2)

    for det in detections:
        bbox = det.to_tlbr()
        #cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)



    if writeVideo_flag: out.write(frame)
        
    fps  = ( fps + (1./(time.time()-t1)) ) / 2
    print("fps= %f"%(fps))

    

if writeVideo_flag: out.release()
    
print("done !!")





fps= 0.148391
fps= 0.451133
fps= 0.595292
fps= 0.669007
fps= 0.697783
fps= 0.718337
fps= 0.721929
fps= 0.732323
fps= 0.736974
fps= 0.729422
fps= 0.727998
fps= 0.728087
fps= 0.719098
fps= 0.712845
fps= 0.716061
fps= 0.714094
fps= 0.712856
fps= 0.719189
fps= 0.718463
fps= 0.718879
fps= 0.708553
fps= 0.708579
fps= 0.709596
fps= 0.709107
fps= 0.708848
fps= 0.709730
fps= 0.707422
fps= 0.705773
fps= 0.703236
fps= 0.707169
fps= 0.708899
fps= 0.711782
fps= 0.714494
fps= 0.713059
fps= 0.708106
fps= 0.706107
fps= 0.705611
fps= 0.703146
fps= 0.711987
fps= 0.710547
fps= 0.709083
fps= 0.701240
fps= 0.705667
fps= 0.711708
fps= 0.699943
fps= 0.703524
fps= 0.702839
fps= 0.707230
fps= 0.694661
fps= 0.690948
fps= 0.706928
fps= 0.708519
fps= 0.709063
fps= 0.708584
fps= 0.703413
fps= 0.704761
fps= 0.697416
fps= 0.698316
fps= 0.699499
fps= 0.703053
fps= 0.705328
fps= 0.706722
fps= 0.695338
fps= 0.705256
fps= 0.702480
fps= 0.701826
fps= 0.702236
fps= 0.703428
fps= 0.707525
fps= 0.706321
fps= 0.702768
fp

fps= 0.699662
fps= 0.695819
fps= 0.686651
fps= 0.690993
fps= 0.692924
fps= 0.701775
fps= 0.698315
fps= 0.693483
fps= 0.695134
fps= 0.695477
fps= 0.699801
fps= 0.704203
fps= 0.700011
fps= 0.694331
fps= 0.687055
fps= 0.688801
fps= 0.694252
fps= 0.691215
fps= 0.692314
fps= 0.690010
fps= 0.683518
fps= 0.712023
fps= 0.701761
fps= 0.699032
fps= 0.695034
fps= 0.695185
fps= 0.691209
fps= 0.691832
fps= 0.689769
fps= 0.697691
fps= 0.694595
fps= 0.696419
fps= 0.695396
fps= 0.691315
fps= 0.685317
fps= 0.681400
fps= 0.691531
fps= 0.695617
fps= 0.694755
fps= 0.691703
fps= 0.692318
fps= 0.692387
fps= 0.693861
fps= 0.694358
fps= 0.687299
fps= 0.687980
fps= 0.687607
fps= 0.694880
fps= 0.692002
fps= 0.692468
fps= 0.692461
fps= 0.696327
fps= 0.697284
fps= 0.690618
fps= 0.692978
fps= 0.691287
fps= 0.691394
fps= 0.690496
fps= 0.696324
fps= 0.692724
fps= 0.680587
fps= 0.693100
fps= 0.686901
fps= 0.692571
fps= 0.698844
fps= 0.698786
fps= 0.699979
fps= 0.694078
fps= 0.692313
fps= 0.693825
fps= 0.694099
fps= 0