In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.148-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [2]:
from ultralytics import YOLO

model = YOLO('yolo11n')

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'...


100%|██████████| 5.35M/5.35M [00:00<00:00, 62.3MB/s]


In [3]:
video_path = '/content/people-detection.mp4'

In [4]:
output_path = 'output_video.avi'

Counting people

In [10]:
import cv2
import numpy as np
from ultralytics import YOLO

def signed_distance(point, line):
  """Calculate the distance between from a point until defined line by two points.
  Allows know what side of the line the point it's.
  point: (x,y)
  line ((x1,y1),(x2,y2))"""
  x,y = point
  (x1,y1),(x2,y2) = line
  num = (y2-y1)*x - (x2-x1)*y + x2*y1-y2*x1
  den = np.sqrt((y2-y1)**2 + (x2-x1)**2)
  return num/den

#Define the counting lines
line1 = ((130, 120), (25,300))
line2 = ((650,175), (720,225))

#Conters for each line
count_line1 = 0
count_line2 = 0

#Threshold to associate detections among frames
distance_threshold = 25 # Solving problems associated a detect multiple pople when they are not there

#Array to almacenate the centroids of before frame
prev_centroids = []

model = YOLO('yolo11n.pt')

#Open the input video and configurate videowriter to output video
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
  raise ValueError("Unable to open video file")

fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'XVID')
writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

while True:
  ret, frame = cap.read()
  if not ret:
    break

  #Detect objects
  results = model(frame, conf= 0.7)
  #Acces to fris result of array
  boxes_obj = results[0].boxes

  current_centroids = []
  if boxes_obj is not None and len(boxes_obj) > 0:
    #Extract boxes and clasess as numpy arrays
    bboxes = boxes_obj.xyxy.cpu().numpy() #Shape (N,4)
    classes = boxes_obj.cls.cpu().numpy() #Shape (N,)
    #Filtering detections by "person"
    for i in range(len(bboxes)):
      if int(classes[i]) == 0:
        x1, y1, x2, y2 = map (int, bboxes[i])
        centroid = ((x1+x2)//2, (y1+y2)//2)
        current_centroids.append(centroid)
        cv2.rectangle(frame, (x1,y1), (x2,y2), (0,255,0), 2)
        cv2.circle(frame, centroid, 4, (0,255,0), -1)

  #Drawing lines of counting
  cv2.line(frame, line1[0], line1[1], (255,0,0), 2)
  cv2.line(frame, line2[0], line2[1], (0,0,255), 2)

  #Compare for each current centroid, with the before frame to detect crosses
  for curr in current_centroids:
    best_distance = float('inf')
    best_prev = None
    for prev in prev_centroids:
      d = np.linalg.norm(np.array(curr) - np.array(prev))
      if d < best_distance and d<distance_threshold:
        best_distance = d
        best_prev = prev
    if best_prev is not None:
      #Verify crosses line1
      prev_side1 = signed_distance(best_prev, line1)
      curr_side1 = signed_distance(curr, line1)
      if prev_side1 * curr_side1 < 0:
        count_line1 += 1

      #Verify crosses line2
      prev_side2 = signed_distance(best_prev, line2)
      curr_side2 = signed_distance(curr, line2)
      if prev_side2 * curr_side2 < 0:
        count_line2 += 1

  #Draw the conters in the frame
  cv2.putText(frame, f'Clothes section: {count_line1}', (10,30), cv2.FONT_HERSHEY_SIMPLEX,1,(255,0,0),2)
  cv2.putText(frame, f'Sports section: {count_line2}', (10,70), cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),2)

  #Write the processed frame in the output video
  writer.write(frame)

  #Update centroids
  prev_centroids = current_centroids.copy()

cap.release()
writer.release()
print(f"Processed video and saved on: {output_path}")


0: 384x640 (no detections), 158.6ms
Speed: 5.8ms preprocess, 158.6ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 145.0ms
Speed: 5.8ms preprocess, 145.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 151.1ms
Speed: 5.1ms preprocess, 151.1ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 161.7ms
Speed: 5.2ms preprocess, 161.7ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 159.0ms
Speed: 5.7ms preprocess, 159.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 150.9ms
Speed: 5.2ms preprocess, 150.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 157.3ms
Speed: 4.7ms preprocess, 157.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 160.0ms
Speed: 5.0ms prepr