In [0]:
# Mount to Google Drive to access our input video and pre-trained models.
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
cd drive/My\ Drive/human-tracker

/content/drive/My Drive/human-tracker


In [0]:

'''
Code modified to run on Google Colab.

Edmond Tsoi, Steven Huang, Nicholas for EECS 442 Final Project, Winter 2020
Human Recognition, Path Detection, and Classification using Faster RCNN

Dependencies: TensorFlow v2.2.0, Python3.7, OpenCV 4.1 (Google Colab Pro with GPU enabled)
'''

import numpy as np
import os 
import tensorflow as tf
import cv2
import time
import pickle
from google.colab.patches import cv2_imshow

# Supress Warning message
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

# Ensure TensorFlow and OpenCV versions are compatible with our code.
print("TensorFlow version: {}".format(tf.__version__))
print("OpenCV version: {}".format(cv2.__version__))

'''
DetectorAPI class adapted from Tensorflow Object Detection Framework
Reference URL: https://github.com/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb
'''
class DetectorAPI:
    def __init__(self, path_to_ckpt):
        self.path_to_ckpt = path_to_ckpt

        self.detection_graph = tf.Graph()
        with self.detection_graph.as_default():
            od_graph_def = tf.compat.v1.GraphDef()
            with tf.io.gfile.GFile(self.path_to_ckpt, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        self.default_graph = self.detection_graph.as_default()
        self.sess = tf.compat.v1.Session(graph=self.detection_graph)

        # Definite input and output Tensors for detection_graph
        self.image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        self.detection_boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        self.detection_scores = self.detection_graph.get_tensor_by_name('detection_scores:0')
        self.detection_classes = self.detection_graph.get_tensor_by_name('detection_classes:0')
        self.num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')

    def processFrame(self, image):
        # Expand dimensions since the trained_model expects images to have shape: [1, None, None, 3]
        image_np_expanded = np.expand_dims(image, axis=0)
        # Actual detection.
        start_time = time.time()
        (boxes, scores, classes, num) = self.sess.run(
            [self.detection_boxes, self.detection_scores, self.detection_classes, self.num_detections],
            feed_dict={self.image_tensor: image_np_expanded})
        end_time = time.time()

        print("Frame Time:", end_time-start_time)

        im_height, im_width,_ = image.shape
        boxes_list = [None for i in range(boxes.shape[1])]
        for i in range(boxes.shape[1]):
            boxes_list[i] = (int(boxes[0,i,0] * im_height),
                        int(boxes[0,i,1]*im_width),
                        int(boxes[0,i,2] * im_height),
                        int(boxes[0,i,3]*im_width))

        return boxes_list, scores[0].tolist(), [int(x) for x in classes[0].tolist()], int(num[0])

    def close(self):
        self.sess.close()
        self.default_graph.close()

'''
Simple algorithm to detect falling
'''
def falling(w, h):
    return float(w)/h >=0.80

if __name__ == "__main__":
    # model_path = '/content/drive/My Drive/human-tracker/ssd_inception_v2_coco_11_06_2017/frozen_inference_graph.pb'
    model_path = '/content/drive/My Drive/human-tracker/faster_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb'
    odapi = DetectorAPI(path_to_ckpt=model_path)
    # Hyperparameters: thresholds for different classes (class 1 = human, class 3 = cars etc.)
    # Reference to labels: https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
    thresholds = [0.4, 0.8]

    # Walking video footage adapted from: https://www.youtube.com/watch?v=QyUi149TcPQ
    # Modify the input video path (You can choose any video as you wish.) 
    cap = cv2.VideoCapture('/content/drive/My Drive/human-tracker/vids/me-falling.mp4')
    # cap = cv2.VideoCapture('/content/drive/My Drive/human-tracker/walking.mp4')
    vw, vh = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) , int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 
    pts = []
    count = 0

    # Adapt for google colab way of displaying video.
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    # make sure resolution matches frame resolution.
    out = cv2.VideoWriter('/content/drive/My Drive/human-tracker/output/output-falling.mp4', fourcc, 20.0, (vw, vh))
    outer_box = []
    while True:
        count += 1
        r, img = cap.read()
        if not r:
          break
        img = cv2.resize(img, (vw, vh))
        boxes, scores, classes, num = odapi.processFrame(img)
        # Visualization of the results of a detection.
        for i in range(len(boxes)):
            # Class 1 represents human.
            if classes[i] == 1 and scores[i] > thresholds[0]:
                box = boxes[i]
                outer_box.append(box)

                x = [box[1], box[3]]
                y = [box[0], box[2]]
                w, h = box[3] - box[1], box[2] - box[0]
                rect_color = (0, 0, 255) if falling(w, h) else (255, 0, 0)
                cv2.putText(img, "Person", (box[1], box[0]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
                cv2.rectangle(img,(box[1],box[0]),(box[3],box[2]), rect_color,2)
                # Code to compute mean coordinates and generate path using the cv2.circle method.
                center_coordinates = (sum(x) // 2, max(y)-5)
                pts.append(center_coordinates)

                for pt in pts:
                    cv2.circle(img, pt, 2, (0, 0, 255), 1)

            # Class 3 represents car.
            if classes[i] == 3 and scores[i] > thresholds[2]:
                box = boxes[i]
                cv2.rectangle(img,(box[1],box[0]),(box[3],box[2]),(0,128,0),2)
        # Save our output video in mp4 format (using cv2.VideoWriter)
        out.write(img)
    # Dump our file into a pickle object.
    file = open('important', 'wb')
    pickle.dump(outer_box, file)
    file.close()
    # Release videoWriter.
    out.release()


TensorFlow version: 2.2.0-rc3
OpenCV version: 4.1.2
Frame Time: 10.379324674606323
Frame Time: 0.09568190574645996
Frame Time: 0.09384465217590332
Frame Time: 0.09581780433654785
Frame Time: 0.09496712684631348
Frame Time: 0.09359025955200195
Frame Time: 0.09392380714416504
Frame Time: 0.09363365173339844
Frame Time: 0.09561824798583984
Frame Time: 0.09414362907409668
Frame Time: 0.10135674476623535
Frame Time: 0.09482312202453613
Frame Time: 0.09474444389343262
Frame Time: 0.09731459617614746
Frame Time: 0.10110068321228027
Frame Time: 0.09650135040283203
Frame Time: 0.1007084846496582
Frame Time: 0.0928652286529541
Frame Time: 0.09488987922668457
Frame Time: 0.09468507766723633
Frame Time: 0.09508943557739258
Frame Time: 0.09749817848205566
Frame Time: 0.09397459030151367
Frame Time: 0.09586429595947266
Frame Time: 0.09151291847229004
Frame Time: 0.09355616569519043
Frame Time: 0.0951223373413086
Frame Time: 0.09502410888671875
Frame Time: 0.09710812568664551
Frame Time: 0.1000821590