In [11]:
from ultralytics import YOLO
import cv2
import cvzone
import math

In [12]:
# # # for webcam
# cap = cv2.VideoCapture(0)
# cap.set(3,1280)
# cap.set(4,720)

In [13]:
# # for videos
cap = cv2.VideoCapture('../videos/cars.mp4')

In [14]:
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
print(f"Resolution: {width}x{height}")

Resolution: 3840.0x2160.0


In [15]:
import torch
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model = YOLO('../yolo_weights/yolov8n.pt')
model.to(device)

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_s

In [16]:
class_names = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", 
               "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", 
               "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", 
               "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", 
               "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", 
               "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", 
               "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", 
               "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", 
               "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", 
               "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", 
               "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]


In [17]:
mask=cv2.imread('../videos/mask.png')

In [18]:
from sort import *
tracker =Sort(max_age=20,min_hits=3,iou_threshold=0.3)

In [19]:
import numpy as np
limits = [2388,1014,3696,729]
total_count=[]

In [20]:

while True:
    success, img = cap.read()
    img_region=cv2.bitwise_and(img,mask)
    results = model(img_region, stream=True)

    detection=np.empty((0,5))

    for r in results:
        boxes = r.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            w, h = x2 - x1, y2 - y1
            

            conf = math.ceil((box.conf[0] * 100)) / 100 
            cls_id = int(box.cls[0]) 
            
            cls_name = class_names[cls_id] if cls_id < len(class_names) else "Unknown"
            
            current_class=class_names[cls_id]
            if current_class=='car' and conf>0.5:
                cvzone.cornerRect(img, (x1, y1, w, h),l=5,rt=4)
                # cvzone.putTextRect(img, f'{cls_name} {conf}', (max(0, x1), max(35, y1)),scale=1.7,thickness=1)
                current_array=np.array([x1,y1,x2,y2,conf])
                detection=np.vstack((detection,current_array))

    result_tracker=tracker.update(detection)
    cv2.line(img,(limits[0],limits[1]),(limits[2],limits[3]),(0,255,255),5)

    for result in result_tracker:
        x1,y1,x2,y2,ID=result
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
        w, h = x2 - x1, y2 - y1
        print(result)
        cvzone.cornerRect(img, (x1, y1, w, h),l=5,rt=1,colorR=(255,0,0))
        cvzone.putTextRect(img, f'{int(ID)}', (max(0, x1), max(35, y1)),scale=1.7,thickness=1)

        cx,cy=x1+w//2,y1+h//2
        cv2.circle(img,(cx,cy),5,(255,0,255),cv2.FILLED)

        if limits[0]<cx<limits[2]:
            if total_count.count(ID)==0:
                total_count.append(ID)
                cv2.line(img,(limits[0],limits[1]),(limits[2],limits[3]),(0,255,0),5)
                
    cvzone.putTextRect(img, f'count: {len(total_count)}', (50, 50))

    cv2.imshow("Image", img)
    # cv2.imshow("Image", img_region)
    cv2.waitKey(1)


0: 384x640 2 cars, 1 truck, 274.0ms
Speed: 76.6ms preprocess, 274.0ms inference, 5379.6ms postprocess per image at shape (1, 3, 384, 640)
[       2455         711        2947        1056           2]
[       3429         908        3838        1304           1]

0: 384x640 2 cars, 1 truck, 35.7ms
Speed: 5.6ms preprocess, 35.7ms inference, 9.1ms postprocess per image at shape (1, 3, 384, 640)
[     2493.6      719.74      2994.4      1078.3           2]
[     3476.8      921.78      3845.2      1308.2           1]

0: 384x640 2 cars, 1 truck, 18.0ms
Speed: 2.5ms preprocess, 18.0ms inference, 8.3ms postprocess per image at shape (1, 3, 384, 640)
[     2536.4      724.31      3032.9        1084           2]
[     3522.1      943.27      3851.1      1321.7           1]

0: 384x640 3 cars, 1 truck, 17.6ms
Speed: 2.6ms preprocess, 17.6ms inference, 2210.9ms postprocess per image at shape (1, 3, 384, 640)
[     2587.9      734.22      3090.3        1098           2]
[     3591.6      970.89 

KeyboardInterrupt: 

: 

In [11]:
import torch

print(torch.__version__)

2.4.1


In [18]:
import torch

if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("MPS device is available.")
else:
    device = torch.device("cpu")
    print("MPS device is not available, using CPU.")


MPS device is available.
