In [3]:
#!/usr/bin/env python
# coding: utf-8

# Computer Vision model for livestream frame people counter
# Using OpenCV, YOLO5  from ultralytics, pytorch
# YOLO5 used the COCO image dataset for training its model 

import cv2
import torch
from PIL import Image

# Install the set of libraries as given in the requirements file to use ultralytics yolov5 model with pytorch
# pip install -r https://raw.githubusercontent.com/ultralytics/yolov5/master/requirements.txt

# Model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Cannot open camera")
    exit()

i = 0

while True:
    # Capture image from camera frame by frame
    ret, frame = cap.read()
    # if frame is read correctly ret is True
    if not ret:
        print("Can't capture frame from stream. Exit...")
        break

    # Save each frame to drive using imwrite method
    cv2.imwrite('/PersonImage'+str(i)+'.jpg', frame)
    
    # Images
    im1 = Image.open('/PersonImage'+str(i)+'.jpg')  # PIL image
    imgs = [im1]  # batch of images

    # Inference
    results = model(imgs, size=640)  # includes NMS

    # Results
    results.print()
    results.show()
    
    # im1 predictions (pandas)
    j = 0
    for k in range(len(results.pandas().xyxy[0])):
        if(results.pandas().xyxy[0]['name'][k] == 'person'):
            j += 1

    i += 1
    
    print('Frame: %s, Total no of people: %s'%(i,j))
    
    if i==5 :
        break

# Release the camera capture object after exiting
cap.release()
cv2.destroyAllWindows()



Using cache found in /Users/nitinsinghal/.cache/torch/hub/ultralytics_yolov5_master
fatal: not a git repository (or any of the parent directories): .git
YOLOv5 🚀 2022-4-10 torch 1.11.0 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 
image 1/1: 720x1280 1 person
Speed: 17.7ms pre-process, 329.3ms inference, 6.8ms NMS per image at shape (1, 3, 384, 640)


Frame: 1, Total no of people: 1


image 1/1: 720x1280 1 person, 1 chair
Speed: 15.6ms pre-process, 334.3ms inference, 0.7ms NMS per image at shape (1, 3, 384, 640)


Frame: 2, Total no of people: 1


image 1/1: 720x1280 1 person, 1 chair
Speed: 17.6ms pre-process, 327.3ms inference, 1.7ms NMS per image at shape (1, 3, 384, 640)


Frame: 3, Total no of people: 1


image 1/1: 720x1280 1 person, 1 chair
Speed: 16.4ms pre-process, 336.1ms inference, 0.9ms NMS per image at shape (1, 3, 384, 640)


Frame: 4, Total no of people: 1


image 1/1: 720x1280 1 person
Speed: 16.6ms pre-process, 315.4ms inference, 0.8ms NMS per image at shape (1, 3, 384, 640)


Frame: 5, Total no of people: 1
