In [1]:
import numpy as np
from ultralytics import YOLO
import cv2
import cvzone
import math
from sort import *

cap = cv2.VideoCapture("./Videos/cars.mp4")  # For Video

model = YOLO("./Yolo-Weights/yolov8l.pt")

classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

mask = cv2.imread("mask.png")

# Tracking
tracker = Sort(max_age=20, min_hits=3, iou_threshold=0.3)

limits = [400, 297, 673, 297]
totalCount = []

while True:
    success, img = cap.read()
    imgRegion = cv2.bitwise_and(img, mask)

    imgGraphics = cv2.imread("graphics.png", cv2.IMREAD_UNCHANGED)
    img = cvzone.overlayPNG(img, imgGraphics, (0, 0))
    results = model(imgRegion, stream=True)

    detections = np.empty((0, 5))

    for r in results:
        boxes = r.boxes
        for box in boxes:
            # Bounding Box
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            # cv2.rectangle(img,(x1,y1),(x2,y2),(255,0,255),3)
            w, h = x2 - x1, y2 - y1

            # Confidence
            conf = math.ceil((box.conf[0] * 100)) / 100
            # Class Name
            cls = int(box.cls[0])
            currentClass = classNames[cls]

            if currentClass == "car" or currentClass == "truck" or currentClass == "bus" \
                    or currentClass == "motorbike" and conf > 0.3:
                # cvzone.putTextRect(img, f'{currentClass} {conf}', (max(0, x1), max(35, y1)),
                #                    scale=0.6, thickness=1, offset=3)
                # cvzone.cornerRect(img, (x1, y1, w, h), l=9, rt=5)
                currentArray = np.array([x1, y1, x2, y2, conf])
                detections = np.vstack((detections, currentArray))

    resultsTracker = tracker.update(detections)

    cv2.line(img, (limits[0], limits[1]), (limits[2], limits[3]), (0, 0, 255), 5)
    for result in resultsTracker:
        x1, y1, x2, y2, id = result
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
        print(result)
        w, h = x2 - x1, y2 - y1
        cvzone.cornerRect(img, (x1, y1, w, h), l=9, rt=2, colorR=(255, 0, 255))
        cvzone.putTextRect(img, f' {int(id)}', (max(0, x1), max(35, y1)),
                           scale=2, thickness=3, offset=10)

        cx, cy = x1 + w // 2, y1 + h // 2
        cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)

        if limits[0] < cx < limits[2] and limits[1] - 15 < cy < limits[1] + 15:
            if totalCount.count(id) == 0:
                totalCount.append(id)
                cv2.line(img, (limits[0], limits[1]), (limits[2], limits[3]), (0, 255, 0), 5)

    # cvzone.putTextRect(img, f' Count: {len(totalCount)}', (50, 50))
    cv2.putText(img,str(len(totalCount)),(255,100),cv2.FONT_HERSHEY_PLAIN,5,(50,50,255),8)

    cv2.imshow("Image", img)
    # cv2.imshow("ImageRegion", imgRegion)
    cv2.waitKey(1)


Ultralytics YOLOv8.0.26  Python-3.10.9 torch-2.0.0+cpu CPU
YOLOv8l summary (fused): 268 layers, 43668288 parameters, 0 gradients, 165.2 GFLOPs

0: 384x640 3 cars, 1235.1ms
Speed: 3.0ms pre-process, 1235.1ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)



[        572         317         651         430           3]
[        378         348         496         458           2]
[        458         226         509         270           1]


0: 384x640 3 cars, 1336.4ms
Speed: 1.0ms pre-process, 1336.4ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)



[     568.95      322.07      649.05      436.93           3]
[     373.99      352.01      493.01      462.99           2]
[        457         228         508         272           1]


0: 384x640 4 cars, 1281.4ms
Speed: 2.0ms pre-process, 1281.4ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)



[        624         198         654         215           4]
[     560.21      346.14      648.08      466.27           3]
[     357.12       367.1      477.47      470.08           2]
[      449.5      233.96      503.14      280.39           1]


0: 384x640 5 cars, 1268.3ms
Speed: 3.0ms pre-process, 1268.3ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)



[      548.7         387      630.89      477.78           3]
[     333.07      390.45      453.59      473.31           2]
[      438.2      242.59      495.36         292           1]


0: 384x640 5 cars, 1359.2ms
Speed: 1.0ms pre-process, 1359.2ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)



[     324.44      415.36      433.85      473.98           2]
[     427.71      251.43      487.32       302.7           1]


0: 384x640 4 cars, 1260.9ms
Speed: 2.0ms pre-process, 1260.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)



[     612.77      201.83      660.86      241.17           4]
[     417.28      260.09      479.46      314.04           1]


0: 384x640 3 cars, 1255.4ms
Speed: 2.0ms pre-process, 1255.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)



[     508.87       198.2      548.91      214.54           5]
[     610.14      206.12      662.67      251.56           4]
[     405.19      269.04      470.52      325.92           1]


0: 384x640 4 cars, 1321.8ms
Speed: 2.0ms pre-process, 1321.8ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 640)



[     504.26      199.34      548.36      218.94           5]
[     606.77         213       662.9       263.5           4]
[     392.05      277.86      461.08      337.46           1]


0: 384x640 4 cars, 1 motorcycle, 1450.0ms
Speed: 1.0ms pre-process, 1450.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


[     500.26      200.04      546.63      222.75           5]
[      602.7      221.46      663.13      277.14           4]
[     376.94      288.54      450.36      351.77           1]



0: 384x640 4 cars, 1323.5ms
Speed: 1.0ms pre-process, 1323.5ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)



[     496.25      200.45      544.73      226.51           5]
[     599.39      231.47      663.09      290.94           4]
[     360.54      300.12      438.49      366.78           1]


0: 384x640 4 cars, 1280.4ms
Speed: 2.0ms pre-process, 1280.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)



[     491.46      201.45      541.46      230.96           5]
[     592.12      242.97       657.6      306.47           4]
[     342.44       312.8      426.14      384.58           1]


0: 384x640 4 cars, 1197.9ms
Speed: 2.0ms pre-process, 1197.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)



[      486.8      204.57      537.32      236.47           5]
[     585.41      255.77      653.68       323.8           4]
[     321.98      326.29      412.08      402.95           1]


KeyboardInterrupt: 