### Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir("/content/drive/MyDrive/Final_Project")

In [3]:
!pip install batch-face -q

In [4]:
!pip install -r notebooks/requirements.txt

Collecting filterpy==1.4.5
  Downloading filterpy-1.4.5.zip (177 kB)
[K     |████████████████████████████████| 177 kB 5.3 MB/s 
[?25hCollecting lap==0.4.0
  Downloading lap-0.4.0.tar.gz (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 39.7 MB/s 
Building wheels for collected packages: filterpy, lap
  Building wheel for filterpy (setup.py) ... [?25l[?25hdone
  Created wheel for filterpy: filename=filterpy-1.4.5-py3-none-any.whl size=110474 sha256=78c9eb5a2079ab8dbaf407e2fc25bd8010e82465a437c6d4bdb55019c085bc5d
  Stored in directory: /root/.cache/pip/wheels/ce/e0/ee/a2b3c5caab3418c1ccd8c4de573d4cbe13315d7e8b0a55fbc2
  Building wheel for lap (setup.py) ... [?25l[?25hdone
  Created wheel for lap: filename=lap-0.4.0-cp37-cp37m-linux_x86_64.whl size=1590160 sha256=bf5058ff5fb947af0878164f1bee5b0d4ae101bfe90990db11df5004cab762d3
  Stored in directory: /root/.cache/pip/wheels/b1/0b/e3/ef9daf1b5547b56389e42c80c3100f1e6479bf5fd00fd9d6ba
Successfully built filterpy lap
Installing 

In [5]:
import cv2
import glob
import time

from tqdm.notebook import tqdm
from batch_face import RetinaFace

from google.colab.patches import cv2_imshow
from notebooks.sort import *
from notebooks.face_models import MobileFaceNet

In [6]:
demo_video_path = "demo_videos/Product Marketing Team Meeting (Weekly) 2021-03-22.mp4"

In [7]:
detector = RetinaFace(0)

Downloading: "https://github.com/elliottzheng/face-detection/releases/download/0.0.1/mobilenet0.25_Final.pth" to /root/.cache/torch/hub/checkpoints/mobilenet0.25_Final.pth


  0%|          | 0.00/1.71M [00:00<?, ?B/s]

In [8]:
tracker = Sort()

In [9]:
def parse_out(resp, score_thr=0.95, area_thr=1000):
  outs = []
  for faces in resp:
    f_outs = []
    for face in faces:
      box, landmarks, score = face
      box = [int(p) for p in box]
      left_eye, right_eye, nose = landmarks[:3]
      area = (box[2] - box[0]) * (box[3] - box[1])
      if area > area_thr and score > score_thr:
        f_outs.append((box, landmarks, score))
    outs.append(f_outs)
  return outs

In [10]:
def add_margin(h, w, box, margin=0):
  x1, y1, x2, y2 = box
  x1 = max(0, x1 - margin)
  x2 = min(x2 + margin, w)
  y1 = max(0, y1 - margin)
  y2 = min(y2 + margin, h)
  return [x1, y1, x2, y2]

In [23]:
def detect_and_track(frame):
  start = time.time()
  detected = detector([frame])
  print(time.time() - start)
  for out in parse_out(detected):
    detections = [face[0] for face in out]
    if len(detections):
      tracked_objects = tracker.update(np.array(detections))
      for box in tracked_objects:
        cls = box[-1]
        box = [int(b) for b in box[:-1]]
        box = add_margin(*frame.shape[:2], box, 20)
        x1, y1, x2, y2 = box
        cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
  return frame

In [25]:
import time

def read_video(path, length=None):
  f_counter = 0
  cap = cv2.VideoCapture(path)
  if (cap.isOpened()== False):
      print("Error opening video stream or file")
  while(cap.isOpened()):
      ret, frame = cap.read()
      if ret == True:
        f_counter += 1
        detect_and_track(frame)
        # cv2_imshow(frame)
      else:
        break
      if length == f_counter:
        break
  cap.release()
  cv2.destroyAllWindows()

In [26]:
read_video(demo_video_path, 40)

(1440, 2560, 3)
0.38736486434936523
(1440, 2560, 3)
0.36090588569641113
(1440, 2560, 3)
0.3517894744873047
(1440, 2560, 3)
0.3932321071624756
(1440, 2560, 3)
0.3970341682434082
(1440, 2560, 3)
0.3982224464416504
(1440, 2560, 3)
0.4010629653930664
(1440, 2560, 3)
0.4031689167022705
(1440, 2560, 3)
0.3906073570251465
(1440, 2560, 3)
0.4045600891113281
(1440, 2560, 3)
0.40103983879089355
(1440, 2560, 3)
0.40354299545288086
(1440, 2560, 3)
0.40894389152526855
(1440, 2560, 3)
0.39344167709350586
(1440, 2560, 3)
0.3956878185272217
(1440, 2560, 3)
0.3766663074493408
(1440, 2560, 3)
0.41408419609069824
(1440, 2560, 3)
0.4001457691192627
(1440, 2560, 3)
0.40108704566955566
(1440, 2560, 3)
0.4132199287414551
(1440, 2560, 3)
0.40770888328552246
(1440, 2560, 3)
0.40412116050720215
(1440, 2560, 3)
0.4051666259765625
(1440, 2560, 3)
0.4021322727203369
(1440, 2560, 3)
0.40538787841796875
(1440, 2560, 3)
0.40000152587890625
(1440, 2560, 3)
0.398576021194458
(1440, 2560, 3)
0.3707590103149414
(1440, 25