<a href="https://colab.research.google.com/github/fediltf/YOLOv8xSort_VehicleCounting/blob/main/YOLOv8xSort_VehicleCounting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

> # Vehicle Detection, Tracking, and Counting with YOLOv8 and SORT


---


***Mohamed Fedi LETAIEF***


## Importing libraries, modules and files

In [None]:
import os
HOME = os.getcwd() # GET current work directory
print(HOME)

/content


In [None]:
!pip install ultralytics
!pip install filterpy

Collecting ultralytics
  Downloading ultralytics-8.1.0-py3-none-any.whl (699 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m699.2/699.2 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting hub-sdk>=0.0.2 (from ultralytics)
  Downloading hub_sdk-0.0.2-py3-none-any.whl (37 kB)
Installing collected packages: hub-sdk, thop, ultralytics
Successfully installed hub-sdk-0.0.2 thop-0.1.1.post2209072238 ultralytics-8.1.0
Collecting filterpy
  Downloading filterpy-1.4.5.zip (177 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.0/178.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: filterpy
  Building wheel for filterpy (setup.py) ... [?25l[?25hdone
  Created wheel for filterpy: filename=filterpy-1.4.5-py3-none-any.whl size=110458 sha256=edff73ec1

In [None]:
import cv2
import torch
import time
import math
import numpy as np
import ultralytics
ultralytics.checks()
from ultralytics import YOLO
from numpy import random
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
%cd {HOME}
%cp /content/drive/MyDrive/sort.py sort.py
import sort

Ultralytics YOLOv8.1.0 🚀 Python-3.10.12 torch-2.1.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
Setup complete ✅ (2 CPUs, 12.7 GB RAM, 26.3/78.2 GB disk)
Mounted at /content/drive
/content


## Defining the YOLOv8_ObjectDetector Class for object detection:

In [None]:
class YOLOv8_ObjectDetector:

  def __init__(self, model_file = 'yolov8n.pt', labels= None, classes = None, conf = 0.25, iou = 0.45 ):
    self.classes = classes
    self.conf = conf
    self.iou = iou

    self.model = YOLO(model_file)
    self.model_name = model_file.split('.')[0]
    self.results = None

    # if no labels are provided then use default COCO names
    if labels == None:
        self.labels = self.model.names
    else:
        self.labels = labels

  def predict_img(self, img, verbose=True):
    # Run the model on the input image with the given parameters
    results = self.model(img, classes=self.classes, conf=self.conf, iou=self.iou, verbose=verbose)

    # Save the original image and the results for further analysis if needed
    self.orig_img = img
    self.results = results[0]

    # Return the detection results
    return results[0]



  def default_display(self, show_conf=True, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):

    # Check if the `predict_img()` method has been called before displaying the detected objects
    if self.results is None:
        raise ValueError('No detected objects to display. Call predict_img() method first.')

    # Call the plot() method of the `self.results` object to display the detected objects on the original image
    display_img = self.results.plot(show_conf, line_width, font_size, font, pil, example)

    # Return the displayed image
    return display_img



  def custom_display(self, colors, show_cls = True, show_conf = True):

    img = self.orig_img
    # calculate the bounding box thickness based on the image width and height
    bbx_thickness = (img.shape[0] + img.shape[1]) // 450

    for box in self.results.boxes:
        textString = ""

        # Extract object class and confidence score
        score = box.conf.item() * 100
        class_id = int(box.cls.item())

        x1 , y1 , x2, y2 = np.squeeze(box.xyxy.cpu().numpy()).astype(int)

        # Print detection info
        if show_cls:
            textString += f"{self.labels[class_id]}"

        if show_conf:
            textString += f" {score:,.2f}%"

        # Calculate font scale based on object size
        font = cv2.FONT_HERSHEY_COMPLEX
        fontScale = (((x2 - x1) / img.shape[0]) + ((y2 - y1) / img.shape[1])) / 2 * 2.5
        fontThickness = 1
        textSize, baseline = cv2.getTextSize(textString, font, fontScale, fontThickness)

        # Draw bounding box, a centroid and label on the image
        img = cv2.rectangle(img, (x1,y1), (x2,y2), colors[class_id], bbx_thickness)
        center_coordinates = ((x1 + x2)//2, (y1 + y2) // 2)

        img =  cv2.circle(img, center_coordinates, 5 , (0,0,255), -1)

          # If there are no details to show on the image
        if textString != "":
            if (y1 < textSize[1]):
                y1 = y1 + textSize[1]
            else:
                y1 -= 2
            # show the details text in a filled rectangle
            img = cv2.rectangle(img, (x1, y1), (x1 + textSize[0] , y1 -  textSize[1]), colors[class_id], cv2.FILLED)
            img = cv2.putText(img, textString ,
                (x1, y1), font,
                fontScale,  (0, 0, 0), fontThickness, cv2.LINE_AA)

    return img


  def predict_video(self, video_path, save_dir, save_format="avi", display='custom', verbose=True, **display_args):

    # Open the input video file
    cap = cv2.VideoCapture(video_path)

    # Get the name of the input video file
    vid_name = os.path.basename(video_path)

    # Get the dimensions of each frame in the input video file
    width = int(cap.get(3))  # get `width`
    height = int(cap.get(4))  # get `height`

    # Create the directory for the output video file if it does not already exist
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)

    # Set the name and path for the output video file
    save_name = self.model_name + ' -- ' + vid_name.split('.')[0] + '.' + save_format
    save_file = os.path.join(save_dir, save_name)

    # Print information about the input and output video files if verbose is True
    if verbose:
        print("----------------------------")
        print(f"DETECTING OBJECTS IN : {vid_name} : ")
        print(f"RESOLUTION : {width}x{height}")
        print('SAVING TO :' + save_file)

    # Define an output VideoWriter object
    out = cv2.VideoWriter(save_file,
                          cv2.VideoWriter_fourcc(*"MJPG"),
                          30, (width, height))

    # Check if the input video file was opened correctly
    if not cap.isOpened():
        print("Error opening video stream or file")

    # Read each frame of the input video file
    while cap.isOpened():
        ret, frame = cap.read()

        # If the frame was not read successfully, break the loop
        if not ret:
            print("Error reading frame")
            break

        # Run object detection on the frame and calculate FPS
        beg = time.time()
        results = self.predict_img(frame, verbose=False)
        if results is None:
            print('***********************************************')
        fps = 1 / (time.time() - beg)

        # Display the detection results
        if display == 'default':
            frame = self.default_display(**display_args)
        elif display == 'custom':
            frame == self.custom_display(**display_args)

        # Display the FPS on the frame
        frame = cv2.putText(frame, f"FPS : {fps:,.2f}",
                            (5, 15), cv2.FONT_HERSHEY_COMPLEX,
                            0.5, (0, 0, 255), 1, cv2.LINE_AA)

        # Write the frame to the output video file
        out.write(frame)


    # After the loop release the cap and video writer
    cap.release()
    out.release()

## Defining the YOLOv8_ObjectCounter Class for object counting:

In [None]:
class YOLOv8_ObjectCounter(YOLOv8_ObjectDetector):

    def __init__(self, model_file = 'yolov8n.pt', labels= None, classes = None, conf = 0.25, iou = 0.45,
                 track_max_age = 45, track_min_hits= 15, track_iou_threshold = 0.3 ):

        super().__init__(model_file , labels, classes, conf, iou)

        self.track_max_age = track_max_age
        self.track_min_hits = track_min_hits
        self.track_iou_threshold = track_iou_threshold




    def predict_video(self, video_path, save_dir, save_format = "avi",
                      display = 'custom', verbose = True, **display_args):

        cap = cv2.VideoCapture(video_path)
        # Get video name
        vid_name = os.path.basename(video_path)


        # Get frame dimensions and print information about input video file
        width  = int(cap.get(3) )  # get `width`
        height = int(cap.get(4) )  # get `height`

        if not os.path.isdir(save_dir):
            os.makedirs(save_dir)

        save_name = self.model_name + ' -- ' + vid_name.split('.')[0] + '.' + save_format
        save_file = os.path.join(save_dir, save_name)

        if verbose:
            print("----------------------------")
            print(f"DETECTING OBJECTS IN : {vid_name} : ")
            print(f"RESOLUTION : {width}x{height}")
            print('SAVING TO :' + save_file)

        # define an output VideoWriter  object
        out = cv2.VideoWriter(save_file,
                            cv2.VideoWriter_fourcc(*"MJPG"),
                            30,(width,height))

        # Check if the video is opened correctly
        if not cap.isOpened():
            print("Error opening video stream or file")

        # Initialize object tracker
        tracker = sort.Sort(max_age = self.track_max_age, min_hits= self.track_min_hits ,
                            iou_threshold = self.track_iou_threshold)

        # Initialize variables for object counting
        totalCount = []
        currentArray = np.empty((0, 5))


        # Read the video frames
        while cap.isOpened():

            detections = np.empty((0, 5))
            ret, frame = cap.read()

            # If the frame was not read successfully, break the loop
            if not ret:
                print("Error reading frame")
                break

            # Run object detection on the frame and calculate FPS
            beg = time.time()
            results = self.predict_img(frame, verbose = False)
            if results == None:
                print('***********************************************')
            fps = 1 / (time.time() - beg)
            for box in results.boxes:
                score = box.conf.item() * 100
                class_id = int(box.cls.item())

                x1 , y1 , x2, y2 = np.squeeze(box.xyxy.cpu().numpy()).astype(int)

                currentArray = np.array([x1, y1, x2, y2, score])
                detections = np.vstack((detections, currentArray))

            # Update object tracker
            resultsTracker = tracker.update(detections)
            for result in resultsTracker:
                #print(type(result))

                # Get the tracker results
                x1, y1, x2, y2, id = result
                x1, y1, x2, y2, id = int(x1), int(y1), int(x2), int(y2), int(id)
                #print(result)

                # Display current objects IDs
                w, h = x2 - x1, y2 - y1
                cx, cy = x1 + w // 2, y1 + h // 2
                id_txt = f"ID: {str(id)}"
                cv2.putText(frame, id_txt, (cx, cy), 4, 0.5, (0, 0, 255), 1)

                # if we haven't seen aprticular object ID before, register it in a list
                if totalCount.count(id) == 0:
                    totalCount.append(id)

            # Display detection results
            if display == 'default':
                frame = self.default_display(**display_args)

            elif display == 'custom':
                frame == self.custom_display( **display_args)

            # Display FPS on frame
            frame = cv2.putText(frame,f"FPS : {fps:,.2f}" ,
                                (5,55), cv2.FONT_HERSHEY_COMPLEX,
                            0.5,  (0,255,255), 1, cv2.LINE_AA)

            # Display Counting results
            count_txt = f"TOTAL COUNT : {len(totalCount)}"
            frame = cv2.putText(frame, count_txt, (5,45), cv2.FONT_HERSHEY_COMPLEX, 2, (0, 0, 255), 2)


            # append frame to the video file
            out.write(frame)

        # After the loop release the cap
        cap.release()
        out.release()
        print(len(totalCount))
        print(totalCount)

## Instantiating object detectors and counters with different YOLOv8 model variants

In [None]:
yolo_names = ['yolov8n.pt', 'yolov8s.pt', 'yolov8m.pt', 'yolov8l.pt', 'yolov8x.pt']
colors = []
for _ in range(80):
    rand_tuple = (random.randint(50, 255), random.randint(50, 255), random.randint(50, 255))
    colors.append(rand_tuple)

detectors = []
for yolo_name in yolo_names:
    detector = YOLOv8_ObjectDetector(yolo_name, conf = 0.60 )
    detectors.append(detector)

counters = []
for yolo_name in yolo_names:
    counter = YOLOv8_ObjectCounter(yolo_name, conf = 0.60 )
    counters.append(counter)

Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.23M/6.23M [00:00<00:00, 302MB/s]

Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s.pt to 'yolov8s.pt'...



100%|██████████| 21.5M/21.5M [00:00<00:00, 364MB/s]

Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m.pt to 'yolov8m.pt'...



100%|██████████| 49.7M/49.7M [00:00<00:00, 128MB/s]


Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l.pt to 'yolov8l.pt'...


100%|██████████| 83.7M/83.7M [00:00<00:00, 270MB/s]


Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x.pt to 'yolov8x.pt'...


100%|██████████| 131M/131M [00:00<00:00, 163MB/s]


## Preparing directories and file-paths

In [None]:
vid_results_path = '/content/drive/MyDrive/YOLOv8xSort_VehicleCounting/video_results'

if not os.path.isdir(vid_results_path):
    os.makedirs(vid_results_path)

## Print diffirent detector labels

In [None]:
d = YOLOv8_ObjectDetector()
for i in d.labels.values():
  print(i)

person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush


## Performing object detection, tracking and counting

In [None]:
for counter in counters:
    counter.predict_video(video_path= '/content/drive/MyDrive/YOLOv8xSort_VehicleCounting/test_videos/traffic.mp4', save_dir = vid_results_path, save_format = "avi", display = 'custom', colors = colors)

----------------------------
DETECTING OBJECTS IN : traffic.mp4 : 
RESOLUTION : 1280x720
SAVING TO :/content/drive/MyDrive/YOLOv8xSort_VehicleCounting/video_results/yolov8n -- traffic.avi
Error reading frame
26
[8, 7, 6, 5, 4, 3, 2, 1, 9, 10, 12, 11, 13, 15, 17, 18, 19, 21, 22, 26, 27, 25, 29, 30, 32, 33]
----------------------------
DETECTING OBJECTS IN : traffic.mp4 : 
RESOLUTION : 1280x720
SAVING TO :/content/drive/MyDrive/YOLOv8xSort_VehicleCounting/video_results/yolov8s -- traffic.avi
Error reading frame
20
[43, 42, 41, 40, 39, 38, 44, 47, 45, 48, 54, 57, 61, 63, 66, 70, 69, 71, 76, 77]
----------------------------
DETECTING OBJECTS IN : traffic.mp4 : 
RESOLUTION : 1280x720
SAVING TO :/content/drive/MyDrive/YOLOv8xSort_VehicleCounting/video_results/yolov8m -- traffic.avi
Error reading frame
25
[86, 85, 84, 83, 82, 81, 87, 88, 89, 90, 91, 92, 98, 99, 101, 104, 103, 105, 100, 109, 112, 107, 113, 116, 117]
----------------------------
DETECTING OBJECTS IN : traffic.mp4 : 
RESOLUTION 

In [None]:
from moviepy.editor import VideoFileClip

clip = VideoFileClip("/content/drive/MyDrive/YOLOv8xSort_VehicleCounting/video_results/yolov8n -- traffic.avi")
clip.write_gif("/content/drive/MyDrive/YOLOv8xSort_VehicleCounting/video_results/yolov8n -- traffic.gif")

MoviePy - Building file /content/drive/MyDrive/YOLOv8xSort_VehicleCounting/video_results/yolov8n -- traffic.gif with imageio.


