In [20]:
import cv2
import numpy as np
from imutils.video import FPS
from scipy.spatial import distance as dist


video_path = r"C:\Users\neash\Downloads\archive (1)\video.mp4"
classes = None


with open('coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]


net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]


cap = cv2.VideoCapture(video_path)
fps = FPS().start()


unique_human_centroids = []
total_human_count = 0
distance_threshold = 100  

def is_new_person(new_centroid, existing_centroids, threshold=distance_threshold):
 
    if len(existing_centroids) == 0:
        return True  
    D = dist.cdist(np.array([new_centroid]), np.array(existing_centroids))
    min_dist = D.min()
    return min_dist > threshold  
    
while cap.isOpened():
    ret, image = cap.read()
    if not ret:
        break

    
    height, width = image.shape[:2]
    blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)

    
    outs = net.forward(output_layers)
    
    class_ids = []
    confidences = []
    boxes = []

    
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > 0.5:
                
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.4, 0.4)

    
    if len(indices) > 0:
        for i in indices.flatten():  
            if class_ids[i] == 0:  
                box = boxes[i]
                center_x = box[0] + (box[2] // 2)
                center_y = box[1] + (box[3] // 2)
                centroid = (center_x, center_y)

                
                if is_new_person(centroid, unique_human_centroids):
                    unique_human_centroids.append(centroid)
                    total_human_count += 1

                
                label = str(classes[class_ids[i]])
                cv2.rectangle(image, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (0, 255, 0), 2)
                cv2.putText(image, label, (box[0] - 10, box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    
    cv2.imshow('Video', image)

    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    
    fps.update()


fps.stop()
cap.release()
cv2.destroyAllWindows()


print(f"Total number of unique humans detected in the video: {total_human_count}")


Total number of unique humans detected in the video: 13
