In [1]:
import cv2
import os
import numpy as np
import time
import imutils
import glob
from sendgrid.helpers.mail import *
from sendgrid import SendGridAPIClient

In [2]:
# Setting working directory
os.chdir(r'xyz')

In [3]:
# Reading all the availabe labels available for detection.
# We are interested in Human Objects only and that is available in the list ==> "Person" Category
LABELS = []
with open("coco.names", "r") as f:
    LABELS = [line.strip() for line in f.readlines()]
    
print(LABELS)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [4]:
# Using cv2 to read the pretrained YOLO algorithm and its weights.
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')

In [5]:
# Getting layers of YOLO
layer_names = net.getLayerNames()
ln = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

In [6]:
# Importing the SORT Algorithm for tracking
from SORT import *
tracker = Sort()
memory = {}
cross_check = []

In [7]:
# Defining the surveillance window
COLORS = np.random.uniform(0, 255, size=(len(LABELS), 3))
window = np.array([[[0,400 ], [500, 400], [500, 575], [0, 575]]], np.int32) 
# This window can be changed by changing the coordinates


# Video Feed input
cam = cv2.VideoCapture("Sample.mp4")
# cam = cv2.VideoCapture(0) ==> Use this command to access the laptop webcam feed
cam.read()
cam.set(3,640)
cam.set(4,480)
writer = None
(W, H) = (None, None)

frameIndex = 0


# Looping for the Detection Process
while True:
    # Read the next frame from the file
    (grabbed, frame) = cam.read()
    
    # If the frame was not grabbed, then we have reached the end of the stream
    if not grabbed:
        break
    
    # If the frame dimensions are empty, grab them
    if W is None or H is None:
        (H, W) = frame.shape[:2]

    # Construct a blob from the input frame and then perform a forward pass of the YOLO object detector, 
    # giving us our bounding boxes and associated probabilities
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    start = time.time()
    layerOutputs = net.forward(ln)
    end = time.time()

    # Initialize our lists of detected bounding boxes, confidences,and class IDs, respectively
    boxes = []
    confidences = []
    classIDs = []

    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            # Extract the class ID and confidence (i.e., probability) of the current object detection
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            # Filter out weak predictions by ensuring the detected probability is greater than the minimum probability
            if confidence > 0.5:
                # Scale the bounding box coordinates back relative to the size of the image, 
                # keeping in mind that YOLO actually returns the center (x, y)-coordinates of the bounding box 
                # followed by the boxes' width and height
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                # Use the center (x, y)-coordinates to derive the top and left corner of the bounding box
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
                
                # Update our list of bounding box coordinates, confidences only for the class: 0 i.e. "person" class
                if classID == 0:
                    boxes.append([x, y, int(width), int(height)])
                    confidences.append(float(confidence))
                    classIDs.append(classID)

    # Creating the frame/region for surveillance
    cv2.polylines(frame, [window], True, (255,120,255),5)
    
    # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3)

    # Ensure at least one detection exists
    dets = []
    if len(idxs) > 0:
        # loop over the indexes we are keeping
        for i in idxs.flatten():
            # Extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            dets.append([x, y, x + w, y + h, confidences[i]])

    # Passing each box through SORT for tracking the person
    np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
    dets = np.asarray(dets)
    tracks = tracker.update(dets)

    boxes = []
    indexIDs = []
    c = []
    previous = memory.copy()
    memory = {}

    for track in tracks:
        boxes.append([track[0], track[1], track[2], track[3]])
        indexIDs.append(int(track[4]))

    # Draw a bounding box rectangle and label on the frame
    if len(boxes) > 0:
        i = int(0)
        for box in boxes:
            (x1, y1) = (int(box[0]), int(box[1]))
            (x2, y2) = (int(box[2]), int(box[3]))
            
            bottom_r_x = x2
            bottom_r_y = y2
            bottom_l_x = x1
            bottom_l_y = y2
            
            color = [int(c) for c in COLORS[classIDs[i]]]
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

            # Raising an Alarm using the SendGrid API
            if (bottom_l_x<500 and bottom_l_x>0 and bottom_l_y<575 and bottom_l_y>400) or (bottom_r_x<500 and bottom_r_x>0 and bottom_r_y<575 and bottom_r_y>400):
                if indexIDs[i] not in cross_check:
                    message = Mail(from_email='HumanDetection@Alarm.com',
                              to_emails='abc@xyz.com', # Enter your personal email id
                              subject='Human Detected',
                              html_content='<strong>Please check your webcam feed</strong>')

                    sg = SendGridAPIClient("xyz") # Enter the api key instead of xyz
                    response = sg.send(message)
                    cross_check.append(indexIDs[i])
            
            # Labelling the Object Frame
            text = "{}: {:.4f}".format(LABELS[classIDs[i]], indexIDs[i])
            cv2.putText(frame, text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            i += 1
     
    frameIndex += 1

    # Output Feed
    cv2.imshow('camera',frame)
    
    k = cv2.waitKey(1)
    if k == 27:
        break
        
# Interrupt the kernel to stop the detection process.

KeyboardInterrupt: 

In [8]:
print('\n [INFO] Exiting Program and cleaning up')
cam.release()
cv2.destroyAllWindows()


 [INFO] Exiting Program and cleanup stuff
