In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Insight to What I have done :- 
# My first response was to try a motion detector . I then realised that there are way too many people and also other 
# stochasticities like moving cars and such. I tried HOG as well, but it gave worse results . Finally I used the yolov3 model
# for predictions. Since I have only one camera video,there is no way for me to obectively determine safe distance because 
# a) images do not offer depth perception
# b) Objects closer to camera will show more pixel to pixel distance than they actually have. So I have currently set a threshold based on a rough estimate based on trying different values.

# Kindly unzip the file in the link . You will find 2 videos, 1 for motion detection and other using YOLO. My main submission in the YOLO one.
# The main video(named output.mp4) carries the results. I did not include it in the notebook because it did not make much sense without the corresponding images.
# The video is very fast so you can either pause the video and look at the values or run it at 0.5 or 0.75x.
# In the main video,you will be able to see real time (w.r.t to video) number of people and no of people [among those the code detected] at risk who will also be marked in red. 
# The motion detection video is included just as it is a baseline. Do note that the  number of people detected can be increased at the risk of some incorrect detections simultaneously.
# Currently I have tried to keep number of incorrects at minimum instead of maximizing no of correct.

# NOTE :- If you want to run the code yourself, you will have to put the .cfg,.weights and .names fill in the same directory (Can be found in the zip file) . Also,the videos should be there.

# To understand the code, you can follow the comments :-

In [2]:
capture = cv2.VideoCapture('Video.mp4')
ret,frame1 = capture.read()
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
writer = None
while(capture.isOpened()):
    ret,frame2 = capture.read()
    if not ret:
        print(0)
        break
    if(writer == None):
        writer = cv2.VideoWriter("output_motion_detector.mp4", fourcc, 30,(frame1.shape[1], frame1.shape[0]), True)
    
    Difference = cv2.absdiff(frame1,frame2)
    gray = cv2.cvtColor(Difference,cv2.COLOR_BGR2GRAY)      
    blur = cv2.GaussianBlur(gray,(5,5),1)
    
    _,thresh = cv2.threshold(blur,20,255,cv2.THRESH_BINARY)
    
    dilated_image = cv2.dilate(thresh,(5,5),iterations=3)
    
    contours,_ = cv2.findContours(dilated_image,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) 
    #cv2.drawContours(frame1,contours,-1,(0,255,0),2)
    for contour in contours:
        (x,y,w,h) = cv2.boundingRect(contour)
        if(7000>=cv2.contourArea(contour) >= 2000):
            cv2.rectangle(frame1,(x,y),(x+w,y+h),(0,255,0),2)
    
    
    
    
    writer.write(frame1)
    cv2.imshow('Image',frame1)
    frame1 = frame2
    if cv2.waitKey(50) == ord('q'):
        break
    
capture.release()
writer.release()
cv2.destroyAllWindows()

AttributeError: 'NoneType' object has no attribute 'release'

In [3]:
def FindCloserThanThreshold(points,threshold=100):
    num = 0
    result = []
    for (x,y) in points:
        check = False
        for(a,b) in points:
            distance = np.sqrt((x-a)**2 + (b-y)**2)
            if(0<distance < threshold):
                check = True
                num+=1
                result.append((x,y))
                break
        
        
    return (num,result)
    

In [6]:
# Implementing YOLOv3
capture = cv2.VideoCapture('Video.mp4')
weights_path = "yolov3_custom_last.weights"
cfg_path = "yolov3_custom.cfg"
labels_path = "obj.names"
labels = open(labels_path).read().strip().split("\n")
print(labels)

fourcc = cv2.VideoWriter_fourcc(*"mp4v")
writer = None


net = cv2.dnn.readNetFromDarknet(cfg_path, weights_path)
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

while True:
    (ret, frame) = capture.read()
    if(writer == None):
        writer = cv2.VideoWriter("output.mp4", fourcc, 30,(frame.shape[1], frame.shape[0]), True)

    if not ret:
        break

    # if the frame dimensions are empty, grab them
    
    (H, W) = frame.shape[:2]


    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),swapRB=True, crop=False)
    net.setInput(blob)
    #start = time.time()
    layerOutputs = net.forward(ln)
    #end = time.time()
    print('1')
    boxes = []
    confidences = []
    classIDs = []
    
    for output in layerOutputs:
        points = [] 
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]
            # filter out weak predictions by ensuring the detected
            # probability is greater than the minimum probability
            if confidence > 0.2 : 

                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")
                

                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))


                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)


    idxs = cv2.dnn.NMSBoxes(boxes, confidences,0.4,0.5)  # IOU intersection over union

    
    
    if len(idxs) > 0:
 
        for i in idxs.flatten():
           
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            points.append((x+w//2,y+h//2))
            color = 120 
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            text = "{}: {:.4f}".format(labels[classIDs[i]],
            confidences[i])
            cv2.putText(frame, text, (x, y - 5),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
    #no_of_violators,result = FindCloserThanThreshold(points,threshold = 50)        
    
    
    
    
    #cv2.putText(frame,"No of people "+str(no_of_people),(10,20),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),3)
    #cv2.putText(frame,"No of Violators "+str(no_of_violators),(10,70),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),3)
    #cv2.imshow('Image',frame)
    writer.write(frame)
    if cv2.waitKey(1) == ord('q'):
        break

capture.release()
writer.release()
cv2.destroyAllWindows()

['Helmet', 'Person', 'Fire', 'Safetyvest']
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
