In [10]:
# Importing required libraries
import cv2
import numpy as np
import random
import os
from PIL import Image
import time

# Derive the paths to the YOLO weights and model configuration
# Load our YOLO object detector trained on COCO dataset
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
# Using GPU for the processing
# set CUDA as the preferable backend and target
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

# Set the distance threshold to 50 pixels.
distance_thres = 50

# Instantiate the VideoCapture object which will help us in reading frames from the video file.
cap = cv2.VideoCapture('data/people1.webm')

# A simple distance function which calculates the distance between two coordinates on the plane.
def dist(x,y):
    try:
        return ((x[0]-y[0])**2 + (x[1]-y[1])**2)**0.5
    except:
        return

# YOLOv3 has 3 output layers (82, 94 and 106) as the figure shows.
# getLayerNames(): Get the name of all layers of the network.
# getUnconnectedOutLayers(): Get the index of the output layers.
# determine only the *output* layer names that we need from YOLO.
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
print('Output layers',output_layers)

# Read a frame from the video just to get the height and width of it.
_,frame = cap.read()

# We will be saving our results in a video output also as shown below using VideoWriter.
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
writer = cv2.VideoWriter('output.avi', fourcc, 30,(frame.shape[1], frame.shape[0]), True)


# Let’s start the loop.
ret = True
while ret:

    # Start reading from the input video.
    ret,img = cap.read()
    # If the cam object is returning something then the ret will be True.
    if ret:
        # Extract image height and width.
        height, width = img.shape[:2]

        # Create a blob of shape 416X416 from the image.
        # Construct a blob from the input frame and then perform a forward
        # pass of the YOLO object detector, giving us our bounding boxes
        # and associated probabilities
        blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

        # Give this blob as input to the net using cv2.dnn.blobFromImage.
        net.setInput(blob)
        # Get output from output layers.
        outs = net.forward(output_layers)

        # Traverse in all the outputs from that frame.
        # Now traverse in all the detections.
        confidences = []
        boxes = []
        
        for out in outs:
            for detection in out:
                # There are 85 points in the detection array. 
                # The first four indices are for coordinates of the box and indexes 
                # starting from 5 till 85 are for class confidences.
                scores = detection[5:]
                # Get the class id by getting the index of that element that has the highest score.
                class_id = np.argmax(scores)
                # If the class_id is not 0 (person), continue. 
                # Because our main purpose in this use case is just to detect persons.
                if class_id!=0:
                    continue
                # Get the confidence score.
                confidence = scores[class_id]
                # If the confidence is greater than 30%, proceed further.
                if confidence > 0.3:
                    # Scale the bounding box coordinates back relative to the size of the image
                    # YOLO actually returns the center (x, y)-coordinates of
                    # the bounding box followed by the boxe's width and height
                    # Calculate the center x and center y points.
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)

                    # Calculate the x,y,w,h of the bounding box.
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)

                    # Append this Bounding Box in our boxes list.
                    boxes.append([x, y, w, h])
                    # Append confidences in the confidences list.
                    confidences.append(float(confidence))

        # Apply non-maxima suppression to suppress weak, overlapping bounding boxes 
        # Here we are performing non-maximum suppression of bounding boxes using cv2.dnn.NMSBoxes. 
        # It will return a list of indexes containing a list of those indexes which we have to consider.
        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

        persons = []
        person_centres = []
        violate = set()

        # Now traverse in all boxes and take only those boxes whose index is in the indexes list. 
        # And append-only these relevant boxes in the persons list. Also, append the box centers in the person_centres array.
        for i in range(len(boxes)):
            if i in indexes:
                x,y,w,h = boxes[i]
                persons.append(boxes[i])
                person_centres.append([x+w//2,y+h//2])

        # Now traverse in person_centres array and find all those person centers who are violating social distancing 
        # norms of 50 pixels. We will pass these person_centres points in the distance function and check the distances 
        # between all the persons. We will add these violating persons in the violate array.        
        for i in range(len(persons)):
            for j in range(i+1,len(persons)):
                if dist(person_centres[i],person_centres[j]) <= distance_thres:
                    violate.add(tuple(persons[i]))
                    violate.add(tuple(persons[j]))
        
        # Simply draw a red box around the persons who are violating the social distancing norms and a green box 
        # around those who are not violating them.
        v = 0
        for (x,y,w,h) in persons:
            if (x,y,w,h) in violate:
                color = (0,0,255)
                v+=1
            else:
                color = (0,255,0)
            cv2.rectangle(img,(x,y),(x+w,y+h),color,2)
            cv2.circle(img,(x+w//2,y+h//2),2,(0,0,255),2)

        # Show no of violations on the screen.
        cv2.putText(img,'No of Violations : '+str(v),(15,frame.shape[0]-10),cv2.FONT_HERSHEY_SIMPLEX,1,(0,126,255),2)
        # Save the output in video form.
        writer.write(img)
        # Showing the output.
        cv2.imshow("Image", img)
    # If anyone hits the ESC key, break the code.
    if cv2.waitKey(1) == 27:
        break

# Release the VideoCapture object and destroy all open windows.
cap.release()
cv2.destroyAllWindows()

Output layers ['yolo_82', 'yolo_94', 'yolo_106']
