# Social Distance Tool with depth

This tool combines two algorithms to accurately detect people who are violating the social distancing protocol:
- Facebook/Detectron2 (Faster RCNN implementation)`https://github.com/facebookresearch/detectron2`
- "Digging into Self-Supervised Monocular Depth Prediction" `https://github.com/nianticlabs/monodepth2`

**Input:**
- A video sequence

**Output:**
- bounding boxes on all persons detected in the video
- highlighing people who are in close proximity
- depth map for accurate calculations 
***

## Code
**Import libraries for Detectron2**

In [1]:
# !python -m detectron2.utils.collect_env # to check if Detectron2 is working fine
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
import random
import matplotlib.pyplot as plt

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

**Import libraries and files for MonoDepth2 algorithm**

In [2]:
# Libraries for monodepth2
from __future__ import absolute_import, division, print_function
%matplotlib inline

import os
import PIL.Image as pil

import torch
from torchvision import transforms

import networks
from utils import download_model_if_doesnt_exist

**Convert Video to PNG Frames**

In [3]:
%%time
!rm -r frames/*
!mkdir frames/

#specify path to video
video = "sample.mp4"

#capture video
cap = cv2.VideoCapture(video)
cnt=0
FPS=cap.get(cv2.CAP_PROP_FPS)
# Check if video file is opened successfully
if (cap.isOpened()== False): 
  print("Error opening video stream or file")

ret,first_frame = cap.read()

#Read until video is completed
while(cap.isOpened()):

  # Capture frame-by-frame
  ret, frame = cap.read()

  if ret == True:

    #save each frame to folder        
    cv2.imwrite('frames/'+'{:04d}'.format(cnt)+'.png', frame)
    cnt=cnt+1
    if(cnt==150):
      break

  # Break the loop
  else: 
    break
    

rm: cannot remove 'frames/*': No such file or directory
CPU times: user 8.19 s, sys: 144 ms, total: 8.33 s
Wall time: 8.17 s


**Download a pretrained model from Detectron2 Model Zoo**

In [4]:
cfg = get_cfg()

# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_C4_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9  # set threshold for this model

# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_C4_3x.yaml")
predictor = DefaultPredictor(cfg)

## Define all the key functions

In [36]:
# define a function which return the bottom center of every bbox
def mid_point(img,person,idx):
  #get the coordinates
  x1,y1,x2,y2 = person[idx]
  _ = cv2.rectangle(img, (x1, y1), (x2, y2), (0,0,255), 2)
  
  #compute bottom center of bbox
  x_mid = int((x1+x2)/2)
  y_mid = int(y2)
  mid   = (x_mid,y_mid)
  
  _ = cv2.circle(img, mid, 5, (0, 0, 255), -1)
  cv2.putText(img, str(idx), mid, cv2.FONT_HERSHEY_SIMPLEX,1, (255, 255, 255), 2, cv2.LINE_AA)
  
  return mid

# define a function which computes euclidean distance between two midpoints
from scipy.spatial import distance
def compute_distance(midpoints,num):
  dist = np.zeros((num,num))
  for i in range(num):
    for j in range(i+1,num):
      if i!=j:
        dst = distance.euclidean(midpoints[i], midpoints[j])
        dist[i][j]=dst
  return dist


# Finds pairs of people who are close together
def find_closest(dist,num,thresh):
  p1=[]
  p2=[]
  d=[]
  for i in range(num):
    for j in range(i,num):
      if( (i!=j) & (dist[i][j]<=thresh)):
        p1.append(i)
        p2.append(j)
        d.append(dist[i][j])
  return p1,p2,d


# Given pairs of people who are close, color them red
def change_2_red(img,person,p1,p2):
  mid1 = []
  mid2 = []
  for p in p1:
    mid1.append(mid_point(img,person,p))
  for pp in p2:
    mid2.append(mid_point(img,person,pp))
  for inx in range(len(mid1)):
      _ = cv2.line(img, mid1[inx], mid2[inx], (0,255,0), thickness=2, lineType=8, shift=0)
  
  risky = np.unique(p1+p2)
  for i in risky:
    x1,y1,x2,y2 = person[i]
    _ = cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,0), 2)  
  return img


# Main function to find closest people
def find_closest_people(name,thresh,savedir):

  img = cv2.imread('frames/'+name)
  outputs = predictor(img)
  classes=outputs['instances'].pred_classes.cpu().numpy()
  bbox=outputs['instances'].pred_boxes.tensor.cpu().numpy()
  ind = np.where(classes==0)[0]
  person=bbox[ind]
  midpoints = [mid_point(img,person,i) for i in range(len(person))]
  num = len(midpoints)
  dist= compute_distance(midpoints,num)
  p1,p2,d=find_closest(dist,num,thresh)
  img = change_2_red(img,person,p1,p2)
  cv2.imwrite(savedir+'/'+name,img)
  return 0

**Fetch all the frames of the video sequence**

In [37]:
frames=[]
for file in os.listdir("frames/"):
    if file.endswith(".png"):
        frames.append(file)
frames.sort()

**Main loop to get all the files**

In [35]:
from tqdm import tqdm
thresh=100
_ = [find_closest_people(frames[i],thresh,'frames2') for i in tqdm(range(len(frames))) ]


  0%|          | 0/150 [00:00<?, ?it/s][A
  1%|          | 1/150 [00:00<01:23,  1.79it/s][A
  1%|▏         | 2/150 [00:00<01:15,  1.97it/s][A
  2%|▏         | 3/150 [00:01<01:09,  2.12it/s][A
  3%|▎         | 4/150 [00:01<01:05,  2.24it/s][A
  3%|▎         | 5/150 [00:02<01:02,  2.33it/s][A
  4%|▍         | 6/150 [00:02<01:00,  2.40it/s][A
  5%|▍         | 7/150 [00:02<00:58,  2.45it/s][A
  5%|▌         | 8/150 [00:03<00:57,  2.49it/s][A
  6%|▌         | 9/150 [00:03<00:56,  2.51it/s][A
  7%|▋         | 10/150 [00:04<00:55,  2.52it/s][A
  7%|▋         | 11/150 [00:04<00:54,  2.54it/s][A
  8%|▊         | 12/150 [00:04<00:54,  2.55it/s][A
  9%|▊         | 13/150 [00:05<00:53,  2.56it/s][A
  9%|▉         | 14/150 [00:05<00:53,  2.56it/s][A
 10%|█         | 15/150 [00:06<00:52,  2.56it/s][A
 11%|█         | 16/150 [00:06<00:52,  2.57it/s][A
 11%|█▏        | 17/150 [00:06<00:51,  2.57it/s][A
 12%|█▏        | 18/150 [00:07<00:51,  2.57it/s][A
 13%|█▎        | 19/150 [00:0

## Save results

In [39]:
%%time
frames=[]
for file in os.listdir("frames2/"):
    if file.endswith(".png"):
        frames.append(file)
frames.sort()

frame_array=[]
for i in range(len(frames)):
    
    #reading each files
    img = cv2.imread('frames2/'+frames[i])
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)

    height, width, layers = img.shape
    size = (width,height)
    
    #inserting the frames into an image array
    frame_array.append(img)

out = cv2.VideoWriter('sample_output2.mp4',cv2.VideoWriter_fourcc(*'DIVX'), FPS, size)
 
for i in range(len(frame_array)):
    # writing to a image array
    out.write(frame_array[i])
out.release()

CPU times: user 13.5 s, sys: 1.17 s, total: 14.7 s
Wall time: 4.38 s
