# Evaluation of Generalized Bounding Box Approach

Files for analyses of bounding boxes can be found in `bounding_box.py`. To see how these functions work, please refer to `sims.ipynb`.


## General Definition


In [1]:
import numpy as np
from matplotlib import pyplot as plt
import cv2

import pickle

import bounding_box as box

In [2]:
# define video to be analyzed
vid_id = 'VIRAT_S_000201_08_001652_001838'

## YOLO Detection

### Import Libraries


In [3]:
!pip install -U mxnet-cu101==1.7.0
!pip install gluoncv

Requirement already up-to-date: mxnet-cu101==1.7.0 in /usr/local/lib/python3.6/dist-packages (1.7.0)


In [4]:
import numpy as np
from matplotlib import pyplot as plt

import cv2
import mxnet as mx
from gluoncv import model_zoo, data, utils

import pickle

### Run Detection and Evaluate Social Distancing

In [5]:
ctx = mx.gpu(0)
net = model_zoo.get_model('yolo3_darknet53_voc', pretrained=True, ctx=ctx)
person_ind = [i for i, cls in enumerate(net.classes) if cls == 'person'][0]

In [6]:
print(mx.context.gpu())

gpu(0)


In [7]:
# read in image
vidcap = cv2.VideoCapture(vid_id+'.mp4')
success, image = vidcap.read()
count = 0

# create object for writing out image
fps = vidcap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
out = cv2.VideoWriter(vid_id + '-YOLO_output.mp4', fourcc, fps, (image.shape[1], image.shape[0]))

# create list for storing results
num_people = []
num_violations = []

while (success):

  # copy image for display
  img_copy = image.copy()

  # convert image for computation
  short_val = 512
  img = mx.nd.array(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
  x, img = data.transforms.presets.yolo.transform_test(img, short=short_val)

  # run the network
  x = x.as_in_context(ctx)
  class_IDs, scores, bounding_boxs = net(x)

  # get results
  ids_arr = class_IDs[0].asnumpy()
  scores_arr = scores[0].asnumpy()
  boxes_arr = bounding_boxs[0].asnumpy()
  inds = np.where(ids_arr[:,0] == person_ind)[0]

  # extract only the people
  class_IDs = mx.nd.array(ids_arr[inds])
  scores = mx.nd.array(scores_arr[inds])
  bounding_boxs = mx.nd.array(boxes_arr[inds])

  # rescale bounding boxes
  bounding_boxs = bounding_boxs.asnumpy()
  bounding_boxs = bounding_boxs * (min(image.shape[1], image.shape[0])/short_val)

  # run bounding_box.count_undistanced
  num_pairs, pair_ids, _ = box.count_undistanced(bounding_boxs)
  
  # plot bounding boxes of people in violation of social distancing
  undistanced_ids = np.unique(pair_ids)
  for id in range(len(bounding_boxs)):
    pt1 = (bounding_boxs[id][0], bounding_boxs[id][1])
    pt2 = (bounding_boxs[id][2], bounding_boxs[id][3])
    if id in undistanced_ids:
      img_copy = cv2.rectangle(img_copy, pt1, pt2, (0,0,255), 2)
    else:
      img_copy = cv2.rectangle(img_copy, pt1, pt2, (0,255,0), 2)

  # append results to lists
  num_violations.append(num_pairs)
  num_people.append(len(bounding_boxs))

  # write image out
  out.write(img_copy)

  # cycle to next frame
  success,image = vidcap.read()
  count += 1

# release in and out video files
vidcap.release()
out.release()
cv2.destroyAllWindows()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[[522.81775 230.78001 530.85596 253.19951]]
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this image!
[]
No detections in this im

### Export Data

In [11]:
# send results to pickle file for later retrieval
with open(vid_id + '-YOLO_data.pkl', 'wb') as f:
  pickle.dump([num_violations, num_people], f)

In [8]:
# IF RUNNING ON COLAB: download files
from google.colab import files
files.download(vid_id + '-YOLO_data.pkl')
files.download(vid_id + '-YOLO_output.mp4')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## DETR Detection

### Import Libraries

In [13]:
!pip install sk-video



In [14]:
import math

from PIL import Image
import requests

import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'

import skvideo.io

import ipywidgets as widgets
from IPython.display import display, clear_output

import torch
from torch import nn
from torchvision.models import resnet50
import torchvision.transforms as T
torch.set_grad_enabled(False);

### Define Helper Functions/Variables

In [15]:
# COCO classes
CLASSES = [
    'N/A', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A',
    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
    'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack',
    'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
    'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass',
    'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
    'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A',
    'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
    'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A',
    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
    'toothbrush'
]

# colors for visualization
COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
          [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]]

In [16]:
# standard PyTorch mean-std input image normalization
transform = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# for output bounding box post-processing
def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)

def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32).cuda()
    return b

### Run Detection and Evaluate Social Distancing

In [17]:
model = torch.hub.load('facebookresearch/detr', 'detr_resnet50', pretrained=True)
model.eval();
model = model.cuda()

Using cache found in /root/.cache/torch/hub/facebookresearch_detr_master


In [37]:
# read in image
vidcap = cv2.VideoCapture(vid_id+'.mp4')
success,image = vidcap.read()
count = 0

# create object for writing out image
fps = vidcap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
out = cv2.VideoWriter(vid_id + '-DETR_output.mp4', fourcc, fps, (image.shape[1], image.shape[0]))

# create list for storing results
num_people = []
num_violations = []

while success:

  # copy image for display
  img_copy = image.copy()

  # convert image to tensor for computations
  img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
  img = transform(img).unsqueeze(0).cuda()

  # propagate through the model
  outputs = model(img)

  # keep only predictions with certain level of confidence
  probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
  keep = probas.max(-1).values > 0.9

  # get scaled bounding boxes
  bboxes_scaled = rescale_bboxes(outputs['pred_boxes'][0, keep], (image.shape[1], image.shape[0]))

  # only get those entries that are people
  person_bboxes = []
  for p, bbox in zip(probas, bboxes_scaled):
    cl = p.argmax()
    if cl == 1:
      person_bboxes.append(bbox.cpu().numpy())
  person_bboxes = np.array(person_bboxes)
  
  # run bounding_box.count_undistanced
  num_pairs, pair_ids, pairwise_distance_array = box.count_undistanced(person_bboxes)
  
  # plot bounding boxes of people in violation of social distancing
  undistanced_ids = np.unique(pair_ids)
  for id in range(len(person_bboxes)):
    pt1 = (person_bboxes[id][0], person_bboxes[id][1])
    pt2 = (person_bboxes[id][2], person_bboxes[id][3])
    if id in undistanced_ids:
      img_copy = cv2.rectangle(img_copy, pt1, pt2, (0,0,255), 2)
    else:
      img_copy = cv2.rectangle(img_copy, pt1, pt2, (0,255,0), 2)

  # append results to lists
  num_violations.append(num_pairs)
  num_people.append(len(person_bboxes))

  # write image out
  out.write(img_copy)

  # cycle to next frame
  success,image = vidcap.read()
  count += 1

# release in and out video files
vidcap.release()
out.release()
cv2.destroyAllWindows()

### Export Data

In [40]:
# send results to pickle file for later retrieval
with open(vid_id + '-DETR_data.pkl', 'wb') as f:
  pickle.dump([num_violations, num_people], f)

In [38]:
# IF RUNNING ON COLAB: download files
from google.colab import files
files.download(vid_id + '-DETR_data.pkl')
files.download(vid_id + '-DETR_output.mp4')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Compare Results

In [63]:
def count_dict(count_violations):

  for idx, entry in enumerate(count_violations):
    if entry == None:
      count_violations[idx] = np.nan
  count_violations = np.array(count_violations)

  index_dict = {}
  for val in np.unique(count_violations[~np.isnan(count_violations)]):
    index_dict[val] = np.where(count_violations==val)[0]
  index_dict[np.nan] = np.where(np.isnan(count_violations))[0]
  
  return index_dict

In [42]:
# unpickle files
yolo_data = open(vid_id + '-YOLO_data.pkl', 'rb')
yolo_num_violations, yolo_num_people = pickle.load(yolo_data)
detr_data = open(vid_id + '-DETR_data.pkl', 'rb')
detr_num_violations, detr_num_people = pickle.load(detr_data)

In [64]:
count_dict(yolo_num_violations)

{0.0: array([   0,    1,    2, ..., 5577, 5578, 5579]),
 1.0: array([  13,   14,   15,   16,   17,   18,   20,   29,   32,   33,   34,
          35,   36,   49,   50,   51,  207,  209,  224,  228,  229,  261,
         262,  263,  264,  265,  272,  273,  278,  279,  280,  281,  282,
         283,  284,  285,  286,  287,  289,  290,  291,  292,  293,  294,
         305,  306,  309,  314,  315,  332,  333,  334,  338,  351,  377,
         378,  460,  463,  508,  509,  537,  550,  553,  569,  598,  600,
         608,  628,  629,  688,  814,  815,  816,  817,  818,  822,  846,
         847,  851,  852, 1478, 1481, 1497, 1935, 1956, 1958, 1959, 1962,
        1979, 1980, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1994,
        1995, 1996, 1998, 2003, 2095, 2097, 2204, 2205, 2206, 2207, 2208,
        2209, 2210, 2211, 2212, 2213, 2214, 2215, 2216, 2217, 2218, 2219,
        2220, 2221, 2222, 2223, 2224, 2225, 2226, 2227, 2228, 2229, 2230,
        2284, 2502, 2503, 2505, 2510, 2551, 2555, 2

In [49]:
count_dict(detr_num_violations)

[15 16 15 ... 12 25 25]


{0: array([1617, 3923, 3924, 3927, 3928, 3934, 3938, 3944, 3945, 3949, 3950,
        3974, 3981, 3995, 3998, 5017, 5054, 5122, 5124, 5161, 5163, 5176,
        5184, 5196]),
 1: array([1561, 1577, 1579, 1613, 2113, 2118, 2119, 2121, 2124, 2125, 2127,
        2141, 2165, 2169, 2172, 2174, 2176, 2177, 2190, 2206, 3115, 3116,
        3117, 3878, 3910, 3919, 3925, 3929, 3935, 3941, 3962, 3964, 3989,
        3990, 3992, 3993, 3997, 4012, 4200, 4260, 4509, 4535, 4578, 4635,
        4700, 4705, 4731, 4816, 4820, 4826, 4830, 4831, 4832, 4940, 5008,
        5009, 5013, 5014, 5019, 5022, 5023, 5031, 5033, 5038, 5043, 5044,
        5045, 5090, 5093, 5100, 5101, 5106, 5107, 5125, 5126, 5137, 5139,
        5143, 5146, 5151, 5155, 5156, 5158, 5159, 5162, 5164, 5165, 5167,
        5179, 5182, 5187, 5198, 5211, 5216, 5220, 5221, 5224, 5236]),
 2: array([ 121,  122,  138,  906,  929, 1232, 1277, 1312, 1325, 1567, 1585,
        1601, 1833, 1854, 2145, 2146, 2150, 2151, 2155, 2161, 2164, 2188,
        237