In [38]:
import cv2
import numpy as np
import os

# Loading YOLO
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
classes = []
with open('coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers().flatten()]

In [39]:
# Loading the video file
video_path = 'american_mall_video.mp4'
video = cv2.VideoCapture(video_path)
if not video.isOpened():
    raise ValueError("Error opening video stream or file")

In [40]:
# Directory to store the frames
frames_dir = 'frames'
os.makedirs(frames_dir, exist_ok=True)

In [41]:
# Getting frame rate and adjusting for frequent frame extraction
fps = video.get(cv2.CAP_PROP_FPS)
interval = max(1, int(fps / 4))  # Extracting four frames per second

count = 0
frame_count = 0
while True:
    ret, frame = video.read()
    if not ret:
        break
    if count % interval == 0:
        frame_filename = f"{frames_dir}/frame_{frame_count}.jpg"
        cv2.imwrite(frame_filename, frame)
        frame_count += 1
    count += 1

video.release()

In [42]:
# Function to detect people
def detect_people(frame_path):
    img = cv2.imread(frame_path)
    if img is None:
        print(f"Failed to load image at {frame_path}")
        return 0
    
    # Increasing the size for better detection accuracy
    img = cv2.resize(img, None, fx=0.8, fy=0.8)
    height, width, channels = img.shape

    blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    class_ids = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.3:  # Lowering confidence threshold for potentially more detections
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.3, 0.2)  # Adjusting NMS parameters for better overlap handling
    people_count = sum([1 for i in range(len(boxes)) if class_ids[i] == 0 and i in indexes])

    return people_count

In [43]:
# Analyzing the extracted frames
people_counts = []
for i in range(frame_count):
    frame_path = f"{frames_dir}/frame_{i}.jpg"
    count = detect_people(frame_path)
    people_counts.append(count)

print("People counts per frame:", people_counts)

People counts per frame: [20, 23, 19, 15, 12, 17, 21, 17, 23, 21, 20, 19, 30, 22, 25, 21, 19, 14, 18, 17, 17, 16, 22, 14, 14]
