# Detecting People

This is an initial experiment for learning purpose on using OpenCV for people detection on videos.

The approach here is very simple:
- Open the input video, get its shape and FPS.
- Setup the output video with the same shape and FPS from the input video.
- From the input video, get the image of the frame.
- Run a full body pre-trained Haar Cascade Classifier on that image, that should return a list of detected full body in a form of a rectangle.
- Draw the rectangles on the image.
- Display the image.
- Save the processed frames into the output video.

### Reference
[Computer Vision — Detecting objects using Haar Cascade Classifier](https://towardsdatascience.com/computer-vision-detecting-objects-using-haar-cascade-classifier-4585472829a9)

In [2]:
import numpy as np
import cv2  # or opencv-python
import time

# Create our body classifier
detector = cv2.CascadeClassifier(
    cv2.data.haarcascades + 'haarcascade_fullbody.xml'
)

# Open the input video capture
#input_filename = './1080p_TownCentreXVID.mp4'
#input_filename = './720p_TownCentreXVID.mp4'
#input_filename = './480p_TownCentreXVID.mp4'
input_filename = './360p_TownCentreXVID.mp4'
vcap = cv2.VideoCapture(input_filename)

# Get video properties
frame_width = int(vcap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(vcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = vcap.get(cv2.CAP_PROP_FPS)
n_frames = int(vcap.get(cv2.CAP_PROP_FRAME_COUNT))

print("Frame width:", frame_width)
print("Frame width:", frame_height)
print("Video fps:", fps)

# Setup the output video file
output_filename = './output.mp4'
apiPreference = cv2.CAP_FFMPEG
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
vout = cv2.VideoWriter(
    filename=output_filename,
    apiPreference=apiPreference,
    fourcc=fourcc,
    fps=fps,
    frameSize=(frame_width, frame_height),
    params=[]
)

print(f"Processing \"{input_filename}\" ({int(n_frames)} frames)...")

# Start app
window_name = "People Tracking"
cv2.startWindowThread()
cv2.namedWindow(window_name)

# Loop each frame
frame_count = 0
frames_to_process = 1000
processed_frames = np.zeros(frames_to_process, dtype=object)

# start timer
start = time.time()
while vcap.isOpened():
    # Read a frame
    ret, frame = vcap.read()
    if not ret or frame_count == frames_to_process:
        break

    # Apply the body classifier
    bodies = detector.detectMultiScale(frame, 1.1, 3)

    # Extract bounding boxes for any bodies identified
    for (x, y, w, h) in bodies:
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)

    # Save frame
    processed_frames[frame_count] = frame
    frame_count += 1

    # Show in app
    cv2.imshow(window_name, frame)
    cv2.waitKey(1)

# end timer
end = time.time()
overall_elapsed_time = end - start
elapsed_time_per_frame = overall_elapsed_time / frame_count

print("Done!")
print(f"{frame_count} frames processed in {overall_elapsed_time} seconds.")
print(f"({elapsed_time_per_frame}) seconds per frame.")
print(f"({1/elapsed_time_per_frame}) frames per second.")

# Write processed frames to file
for frame in processed_frames:
    vout.write(frame)

print(f"Output saved to \"{output_filename}\".")

vcap.release()
vout.release()
cv2.destroyAllWindows()

Frame width: 640
Frame width: 360
Video fps: 25.0
Processing "./360p_TownCentreXVID.mp4" (7502 frames)...
Done!
1000 frames processed in 42.74022030830383 seconds.
(0.042740220308303836) seconds per frame.
(23.397165311422455) frames per second.
Output saved to "./output.mp4".
