In [None]:
import sys
sys.path.append('deep_sort')
sys.path.append('pictor-ppe')

In [None]:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Input

import numpy as np
import pandas as pd
import cv2

import matplotlib.pyplot as plt
import matplotlib as mpl

from IPython.display import display, Math
from time import time

# import sys
# sys.path.append('../')

from src.utils.image import letterbox_image, draw_detection
from src.yolo3.model import yolo_body

from src.utils.fixes import *
fix_tf_gpu()

In [None]:
class_names = ['H', 'V']

# определяет три серии якорей для трех output-слоёв (слои для улучшения показателей обнаружения объектов разных масштабов)
# [w, h]
# отсортированы в порядке по высоте
# anchor_boxes = np.array(
#         [
#           np.array([[ 53, 198 ], [ 117, 294], [293, 334]]) /64, # output-1 anchor boxes (sizes of anchor boxes (relative to grid cell width and height))
#           np.array([[73, 63], [30, 97], [153, 119]]) /32, # output-2 anchor boxes
#           np.array([[8, 16], [38, 27], [19, 48]]) /16   # output-3 anchor boxes
#         ],
#         dtype='float64'
#     )

anchor_boxes = np.array(
        [
          np.array([[153, 119], [ 117, 294], [293, 334]]) /32, # output-1 anchor boxes (sizes of anchor boxes (relative to grid cell width and height))
          np.array([[73, 63], [30, 97], [ 53, 198 ]]) /16, # output-2 anchor boxes
          np.array([[8, 16], [38, 27], [19, 48]]) /8   # output-3 anchor boxes
        ],
        dtype='float64'
    )

input_shape  = (1248, 1248)

In [None]:
K.clear_session() # clear memory

# number of classes and number of anchors
# num_classes = len(class_names)
num_classes = 3
num_anchors = anchor_boxes.shape[0] * anchor_boxes.shape[1]

# input and output
input_tensor = Input( shape=(input_shape[0], input_shape[1], 3) ) # input
# num_out_filters = ( num_anchors//3 ) * ( 5 + num_classes )        # output
num_out_filters = 3 * ( 5 + num_classes )        # output


# build the model
model = yolo_body(input_tensor, num_out_filters)

In [None]:
weight_path = sys.path[-1] + 'path_to_model_weights'

model.load_weights( weight_path )

In [None]:
def detection(
    prediction,
    anchor_boxes,
    num_classes,
    image_shape,
    input_shape,
    max_boxes = 20,
    score_threshold=0.3,
    iou_threshold=0.45,
    classes_can_overlap=True,
):
    '''
    INPUT:
    OUTPUT:
    '''

    all_boxes  = []

    '''@ Each output layer'''
    for output, anchors in zip( prediction, anchor_boxes ):

        '''Preprocessing'''
        '''-------------'''
        # shapes
        batch_size     = output.shape[0]
        grid_h, grid_w = output.shape[1:3]

        # reshape to [batch_size, grid_height, grid_width, num_anchors, box_params]
        output = tf.reshape( output, [ -1, grid_h, grid_w, len(anchors), num_classes+5 ] )

        # create a tensor for the anchor boxes
        anchors_tensor = tf.constant(anchors, dtype=output.dtype)

        '''Scaling factors'''
        '''---------------'''
        image_shape_tensor = tf.cast( image_shape,       output.dtype ) # actual image's shape
        grids_shape_tensor = tf.cast( output.shape[1:3], output.dtype ) # grid_height, grid_width @ output layer
        input_shape_tensor = tf.cast( input_shape,       output.dtype )  # yolo input image's shape

        # reshape
        image_shape_tensor = tf.reshape( image_shape_tensor, [-1, 1, 1, 1, 2] )
        grids_shape_tensor = tf.reshape( grids_shape_tensor, [-1, 1, 1, 1, 2] )
        input_shape_tensor = tf.reshape( input_shape_tensor, [-1, 1, 1, 1, 2] )

        ### Scaling factors
        sized_shape_tensor = tf.round( image_shape_tensor * tf.reshape( tf.reduce_min( input_shape_tensor / image_shape_tensor, axis=-1 ), [-1,1,1,1,1] ) )
        # to scale the boxes from grid's unit to actual image's pixel unit
        box_scaling = input_shape_tensor * image_shape_tensor / sized_shape_tensor / grids_shape_tensor
        # to offset the boxes
        box_offsets = (tf.expand_dims(tf.reduce_max(image_shape_tensor, axis=-1), axis=-1) - image_shape_tensor) / 2.

        '''Box geometric properties'''
        '''------------------------'''
        grid_h, grid_w = output.shape[1:3] # grid_height, grid_width @ output layer

        grid_i = tf.reshape( np.arange(grid_h), [-1, 1, 1, 1] )
        grid_i = tf.tile( grid_i, [1, grid_w, 1, 1] )

        grid_j = tf.reshape( np.arange(grid_w), [1, -1, 1, 1] )
        grid_j = tf.tile( grid_j, [grid_h, 1, 1, 1] )

        grid_ji = tf.concat( [grid_j, grid_i], axis=-1 )
        grid_ji = tf.cast( grid_ji, output.dtype )

        # Box centers
        box_xy  = output[..., 0:2]
        box_xy  = tf.sigmoid( box_xy ) + grid_ji

        # Box sizes
        box_wh  = output[..., 2:4]
        box_wh  = tf.exp( box_wh ) * anchors_tensor

        # scale to actual pixel unit
        box_xy  = box_xy * box_scaling - box_offsets[...,::-1]
        box_wh  = box_wh * box_scaling

        # calculate top-left corner (x1, y1) and bottom-right corner (x2, y2) of the boxex
        box_x1_y1 = box_xy - box_wh / 2
        box_x2_y2 = box_xy + box_wh / 2

        # top-left corner cannot be negative
        box_x1_y1 = tf.maximum(0, box_x1_y1)
        # bottom-right corner cannot be more than actual image size
        box_x2_y2 = tf.minimum(box_x2_y2, image_shape_tensor[..., ::-1])

        '''Box labels and confidences'''
        '''--------------------------'''
        # class probabilities = objectness score * conditional class probabilities
        if classes_can_overlap:
            # use sigmoid for the conditional class probabilities
            classs_probs = tf.sigmoid( output[..., 4:5] ) * tf.sigmoid( output[..., 5:] )
        else:
            # use softmax for the conditional class probabilities
            classs_probs = tf.sigmoid( output[..., 4:5] ) * tf.nn.softmax( output[..., 5:] )

        box_cl = tf.argmax( classs_probs, axis=-1 )     # final classes
        box_sc = tf.reduce_max( classs_probs, axis=-1 ) # confidence scores

        '''Organize'''
        '''--------'''
        # take care of dtype and dimensions
        box_cl = tf.cast( box_cl, output.dtype )
        box_cl = tf.expand_dims(box_cl, axis=-1)
        box_sc = tf.expand_dims(box_sc, axis=-1)

        # store all information as: [ left(x1), top(y1), right(x2), bottom(y2),  confidence, label ]
        boxes  = tf.reshape( tf.concat( [ box_x1_y1, box_x2_y2, box_sc, box_cl ], axis=-1 ),
                              [batch_size, -1, 6] )

        all_boxes. append( boxes  )

    # Merge across all output layers
    all_boxes  = tf.concat( all_boxes,  axis=1 )

    # To store all the final results of all images in the batch
    all_final_boxes = []

    '''For each image in the batch'''
    for _boxes_ in all_boxes:

        if classes_can_overlap:
            '''Perform NMS for each class individually'''

            # to stote the final results of this image
            final_boxes = []

            # for class_id in range(num_classes):
            for class_id in [1]:

                # Get the boxes and scores for this class
                class_boxes  = _boxes_[ _boxes_[...,-1] == class_id ]

                '''Non-max-suppression'''
                selected_idc = tf.image.non_max_suppression(
                    class_boxes[...,:4], # boxes' (y1,x1,y2,x2)
                    class_boxes[...,-2], # boxes' scores
                    max_output_size = max_boxes,
                    iou_threshold = iou_threshold,
                    score_threshold = score_threshold
                )

                # boxes selected by nms
                class_boxes = tf.gather( class_boxes,  selected_idc )
                final_boxes.append( class_boxes )

            # concatenate boxes for each class in the image
            final_boxes  = tf.concat( final_boxes,  axis=0 )

        else:
            '''Perform NMS for all classes'''

            # nms indices
            selected_idc = tf.image.non_max_suppression(
                _boxes_[...,:4], # boxes' (y1,x1,y2,x2)
                _boxes_[...,-2], # boxes' scores
                max_output_size = max_boxes,
                iou_threshold = iou_threshold,
                score_threshold = score_threshold
            )

            # boxes selected by nms
            final_boxes = tf.gather( _boxes_,  selected_idc )

        # append final boxes for each image in the batch
        all_final_boxes.append( final_boxes )

    return all_final_boxes

In [None]:
import cv2
import numpy as np
# from deep_sort_realtime.deepsort_tracker import DeepSort

# DeepSORT -> Importing DeepSORT.
from application_util import preprocessing
from deep_sort import nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from tools import generate_detections as gdet


video_frames = [];

cap = cv2.VideoCapture(sys.path[-1] + '/../ConstructionSiteOne.mp4')
fps = cap.get(cv2.CAP_PROP_FPS)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))   # float `width`
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # float `height`

out = cv2.VideoWriter(
    sys.path[-1] + '/../outputOne.mp4',
    cv2.VideoWriter_fourcc(*'MP4V'),
    fps,
     (frame_width, frame_height)
)

model_filename = sys.path[-1] + '/../mars-small128.pb'
encoder = gdet.create_box_encoder(model_filename, batch_size=1)

max_cosine_distance = 0.4
nn_budget = 100
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
# nn_matching.
tracker = Tracker(
    metric,
    max_iou_distance=0.7,
    max_age = int(10 * fps)
)

# tracker = DeepSort(
#     max_age=int(fps * 10),
#     max_cosine_distance=0.2,
#     embedder='mobilenet',
# )

# Check if camera opened successfully
if (cap.isOpened()== False):
  print("Error opening video stream or file")

# Save original image shape
image_shape = np.array( [frame_height, frame_width] )

counter = 1
batch_frames = []
batch_frames_resized = []
diff_frames = [np.zeros(shape=image_shape)]

'''Get the boxes'''
t0 = time() # set a timer
ress = []
tracks_arr = []

prev_frame = None

while(cap.isOpened()):
  ret, frame = cap.read()
  if ret == True:
    batch_frames.append(frame)
    batch_frames_resized.append(letterbox_image(frame, (1248, 1248))/255.)

    # Smoothing (to delete unnecessory noise) and grayscayling
    blured_frame  = cv2.GaussianBlur(frame, (5,5), 0)
    buff_frame = cv2.cvtColor(blured_frame, cv2.COLOR_BGR2GRAY)
    if counter > 1:
      diff_frame = abs(buff_frame - prev_frame)
      diff_frames.append(diff_frame)
    prev_frame = buff_frame

    if counter % 10 == 0:
      image_data  = np.array( batch_frames_resized )

      prediction = model.predict(image_data)

      boxes = detection(
          prediction,
          anchor_boxes,
          num_classes,
          image_shape,
          input_shape = (1248, 1248),
          max_boxes = 10,
          score_threshold=0.5,
          iou_threshold=0.45)

      dets = [[[[box[0],  box[1], box[2] - box[0], box[3] - box[1]], box[4]]
      for box in image_boxes.numpy()]
      for image_boxes in boxes]

      # detections = [[([int(box[0]),  int(box[1]), int(box[2] - box[0]), int(box[3] - box[1])], box[4], box[5])
      # for box in image_boxes.numpy()]
      # for image_boxes in boxes]

      for detection_batch, fframe, bboxes, dif_frame in zip(dets, batch_frames, boxes, diff_frames):
        detts = [x[0] for x in detection_batch]
        scores = [x[1] for x in detection_batch]
        features = encoder(fframe, detts)
        # features = np.ones(shape=(len(detection_batch), 128), dtype='float64')
        detections = [Detection(bbox, score, feature) for bbox, score, feature in zip(detts, scores, features)]
        tracker.predict()
        tracker.update(detections)
        tracks = tracker.tracks
        res_img = draw_detection(fframe, tracks, class_names, dif_frame)
        out.write(res_img)

      # for detection_batch, fframe, bboxes in zip(detections, batch_frames, boxes):
      #   tracks = tracker.update_tracks(detection_batch, frame=fframe, embeds=[[1, 1, 1] for x in detection_batch])
      #   res_img = draw_detection(fframe, bboxes.numpy(), tracks, class_names)
      #   out.write(res_img)
      #   tracks_arr.append([[x for x in track.mean] for track in tracks])

      batch_frames.clear()
      batch_frames_resized.clear()
      diff_frames.clear()

      if counter == 11:
        break

    counter += 1

  else:
    break

cap.release()
out.release()

print('time taken to process : {:.2f} ms'.format( (time()-t0)*1000 ))


time taken to process : 272722.66 ms


In [None]:
import cv2
import numpy as np
# from deep_sort_realtime.deepsort_tracker import DeepSort

# DeepSORT -> Importing DeepSORT.
from application_util import preprocessing
from deep_sort import nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from tools import generate_detections as gdet


video_frames = [];

cap = cv2.VideoCapture(sys.path[-1] + 'path_to_input_video')
fps = cap.get(cv2.CAP_PROP_FPS)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))   # float `width`
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # float `height`

out = cv2.VideoWriter(
    sys.path[-1] + 'path_to_output_video',
    cv2.VideoWriter_fourcc(*'MP4V'),
    fps,
     (frame_width, frame_height)
)


# Check if camera opened successfully
if (cap.isOpened()== False):
  print("Error opening video stream or file")

# Save original image shape
image_shape = np.array( [frame_height, frame_width] )

counter = 1
diff_frame = np.zeros(shape=(720, 1280), dtype='uint8')

'''Get the boxes'''
t0 = time() # set a timer

prev_frame = None

while(cap.isOpened()):
  ret, frame = cap.read()
  if ret == True:
    frame  = cv2.GaussianBlur(frame, (9,9), 5)
    buff_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    if counter > 1:
      diff_frame = abs(buff_frame - prev_frame).astype('uint8')
      diff_frame[diff_frame < 200] = 0
    prev_frame = buff_frame
    out.write(np.repeat(np.expand_dims(diff_frame, axis=-1), 3, axis=-1))
    counter += 1
    # print('Writed')
  else:
    break

cap.release()
out.release()

print('time taken to process : {:.2f} ms'.format( (time()-t0)*1000 ))


time taken to process : 38991.49 ms
