In [2]:
import argparse
import cv2
import os
import imutils
import numpy as np # pip install numpy
import mediapipe as mp
import tensorflow as tf
from tensorflow.keras.models import load_model

In [3]:
NMS_THRESH = 0.1
MIN_CONF = 0.1
import numpy as np
import cv2

# function to detect people
def detect_people(frame, net, ln, personIdx=0):
    # grab dimensions of the frame and initialize the list of results
    (H, W) = frame.shape[:2]
    results = []

    # construct a blob from the input frame and then perfrom a forward pass
    # of the YOLO object detector, giving us the bounding boxes and
    # associated probabilities
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(ln)

    # initialize lists of detected bounding boxes, centroids, and confidence
    boxes = []
    centroids = []
    confidences = []

    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            # extract teh class ID and confidence(probability) of the current object detection
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            # filter detections by (1) ensuring that the object detected was a person and
            # (2) that the minimum confidence is met
            if classID == personIdx and confidence > MIN_CONF:
                # scale the bounding box coordinates back relative to the size of
                # the image, keeping in mind that YOLO actually returns the center (x, y)-coordinates
                # of the bounding box followed by the boxes' width and height
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                # use the center (x,y)-coordinates to derive the top and left corner of
                # the bounding box
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                # update the list of bounding box coordinates, centroids and confidences
                boxes.append([x, y, int(width), int(height)])
                centroids.append((centerX, centerY))
                confidences.append(float(confidence))

    # apply non-maxima suppression to suppress weak, overlapping bounding boxes
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, MIN_CONF, NMS_THRESH)

    # ensure at least one detection exists
    if len(idxs) > 0:
        # loop over the indexes being kept
        for i in idxs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            # update the results list to consist of the person prediction probability,
            # bounding box coordinates, and the centroid
            r = (confidences[i], (x, y, x + w, y + h), centroids[i])
            results.append(r)

    # return the list of results
    return results

In [4]:
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", type=str, default="",
help="path to (optional) input video file")
ap.add_argument("-o", "--output", type=str, default="",
help="path to (optional) output video file")
ap.add_argument("-d", "--display", type=int, default=1,
help="whether or not output frame should be displayed")
args = vars(ap.parse_args(["--input","C:/Users/Dhaval Makvana/Desktop/books/Major Project/dhaval_video.mp4","--output","C:/Users/Dhaval Makvana/Desktop/books/Major Project/dhaval_output_video.avi","--display","1"]))

In [6]:
# load the COCO class labels our YOLO model was trained on
labelsPath = os.path.sep.join(["C:/Users/Dhaval Makvana/Desktop/books/Major Project/obj.names"])
LABELS = open(labelsPath).read().strip().split("\n")
CLASSES = ["DHAVAL"]

# derive the paths to the YOLO weights and model configuration
weightsPath = os.path.sep.join(["C:/Users/Dhaval Makvana/Desktop/books/Major Project/yolov4-tiny_last.weights"])
configPath = os.path.sep.join(["C:/Users/Dhaval Makvana/Desktop/books/Major Project/yolov4-tiny.cfg"])

# load our YOLO object detector trained on COCO dataset (80 classes)
print("[INFO] loading YOLO from disk...")
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

# determine only the *output* layer names that we need from YOLO


# initialize the video stream and pointer to output video file
print("[INFO] accessing video stream...")
ln = net.getLayerNames()
ln = [ln[i - 1] for i in net.getUnconnectedOutLayers()]

# initialize the video stream and pointer to output video file
print("[INFO] accessing video stream...")
vs = cv2.VideoCapture('C:/Users/Dhaval Makvana/Desktop/books/Major Project/dhaval_video.mp4')
writer = None


   
   
# loop over the frames from the video stream
while True:
    # read the next frame from the file
    (grabbed, frame) = vs.read()
    print("running")
    # if the frame was not grabbed, then we have reached the end
    # of the stream
    if not grabbed:
        break

    # resize the frame and then detect people (and only people) in it
    frame = imutils.resize(frame, width=700)
    results = detect_people(frame, net, ln,personIdx=LABELS.index("DHAVAL"))
   
    print(results)
   

   

# loop over the results
    for (i, (prob, bbox, centroid)) in enumerate(results):
# extract the bounding box and centroid coordinates, then
# initialize the color of the annotation
        (startX, startY, endX, endY) = bbox
        (cX, cY) = centroid
        color = (0, 255, 0)


# draw (1) a bounding box around the person and (2) the
# centroid coordinates of the person,
        cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
        #cv2.circle(frame, (cX, cY), 5, color, 1)
        cv2.putText(frame, "{} [{:.2f}]".format('DHAVAL', float(prob)),
                    (startX, startY - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                    color, 2)


# check to see if the output frame should be displayed to our
# screen
    if args["display"] > 0:
        # show the output frame
        cv2.imshow("",frame)
        key = cv2.waitKey(1) & 0xFF

# if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break

# if an output video file path has been supplied and the video
# writer has not been initialized, do so now
    if args["output"] != "" and writer is None:
# initialize our video writer
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        writer = cv2.VideoWriter(args["output"], fourcc,29.97,
            (frame.shape[1], frame.shape[0]), True)
       
   

   

# if the video writer is not None, write the frame to the output
# video file
    if writer is not None:
        writer.write(frame)

[INFO] loading YOLO from disk...
[INFO] accessing video stream...
[INFO] accessing video stream...
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[(0.38154101371765137, (129, 33, 708, 786), (419, 410))]
running
[(0.46558937430381775, (134, 41, 708, 778), (421, 410))]
running
[(0.3390692472457886, (138, 39, 703, 777), (421, 408))]
running
[(0.6172656416893005, (157, -33, 731, 862), (444, 414))]
running
[(0.5631928443908691, (136, 55, 712, 775), (424, 415))]
running
[(0.7516108751296997, (125, 59, 721, 776), (423, 418))]
running
[(0.5802648663520813, (120, 39, 7

[(0.5698837637901306, (169, 140, 702, 887), (436, 514))]
running
[(0.5654444098472595, (164, 160, 710, 888), (437, 524))]
running
[(0.591749370098114, (165, 165, 702, 878), (434, 522))]
running
[(0.6668841242790222, (169, 162, 700, 873), (435, 518))]
running
[(0.7402273416519165, (170, 147, 698, 890), (434, 519))]
running
[]
running
[]
running
[]
running
[(0.7519161105155945, (126, 168, 662, 853), (394, 511))]
running
[(0.9394276142120361, (128, 180, 655, 836), (392, 508))]
running
[(0.7210284471511841, (131, 172, 666, 833), (399, 503))]
running
[(0.6827504634857178, (136, 169, 666, 837), (401, 503))]
running
[(0.8548694849014282, (132, 190, 666, 829), (399, 510))]
running
[(0.7677444815635681, (135, 183, 667, 836), (401, 510))]
running
[(0.6409962177276611, (138, 189, 670, 822), (404, 506))]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
[]
running
