In [1]:
!pip install vidgear -q
!pip install  opencv-python ultralytics openfilter cvzone -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/50.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.6/50.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/122.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.0/122.0 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m60.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.1/107.1 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m116.9 MB/s[0m eta [36m0

In [1]:
import torch
import cvzone
import numpy as np
import cv2
from ultralytics import YOLO # Import the YOLO class for YOLOv8

from openfilter.filter_runtime import Frame, Filter
from openfilter.filter_runtime.filters.video_in import VideoIn
from openfilter.filter_runtime.filters.video_out import VideoOut

class YOLOv8PersonCounterFilter(Filter):
    def setup(self, config):
        """
        Initializes the YOLOv8 model and sets up class IDs for detection.
        This method is called once when the filter pipeline starts.
        """
        print(f'YOLOv8PersonCounterFilter setup: {config.my_option=}')

        # Load a pre-trained YOLOv8 model.
        # 'yolov8n.pt' is the nano version, good for real-time and edge devices.
        self.model = YOLO('yolov8n.pt')
        self.model.eval() # Set model to evaluation mode
        self.model.fuse() # Fuse model for faster inference

        self.class_names_dict = self.model.names
        print("YOLOv8 Class Names:", self.class_names_dict)

        # Set a confidence threshold for detections.
        self.confidence_threshold = 0.3

        # Define the class IDs you want to count
        self.person_class_id = None
        self.track_history = {}
        self.in_count = 0
        self.out_count = 0
        self.names = self.model.names

    def process(self, frames):
        """
        Processes each incoming video frame, performs object detection,
        counts persons, and draws results on the frame.
        """
        # Define vertical line's X position
        self.line_x = 600

        # Get the current frame as a NumPy array (RGB format).
        frame_data = frames['main'].rw_rgb
        image = frame_data.image  # NumPy array (H, W, C)
        data = frame_data.data    # Metadata dictionary

        # Create a copy of the image to draw on to avoid modifying the original input.
        output_image = image.copy()

        results = self.model.track(output_image, persist=True, classes=[0], conf=self.confidence_threshold)

        if results[0].boxes.id is not None:
            ids = results[0].boxes.id.cpu().numpy().astype(int)
            boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
            class_ids = results[0].boxes.cls.int().cpu().tolist()

            for track_id, box, class_id in zip(ids, boxes, class_ids):
                x1, y1, x2, y2 = box
                name = self.names[class_id]
                cx = int((x1 + x2) / 2)
                cy = int((y1 + y2) / 2)
                cv2.rectangle(output_image, (x1, y1), (x2, y2), (50, 205, 50), 2)

                label = f'Person:{track_id}'

                # Draw the label text.
                (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
                cv2.rectangle(output_image, (x1, y1 - text_height - baseline - 5),
                              (x1 + text_width + 5, y1), (50, 205, 50), -1)
                cv2.putText(output_image, label, (x1 + 2, y1 - baseline - 2),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv2.LINE_AA)







                #cvzone.putTextRect(output_image, f'Person:{track_id}', (x1, y1), scale=2, thickness=1, colorT=(255, 255, 255), colorR=(32, 178, 170))

                if track_id in self.track_history:
                    prev_cx, prev_cy = self.track_history[track_id]
                    if prev_cx < self.line_x <= cx: # Crossing from left to right
                        self.in_count += 1
                        #cv2.circle(output_image, (cx, cy), 4, (255, 0, 0), -1)

                    elif prev_cx > self.line_x >= cx: # Crossing from right to left
                        self.out_count += 1
                        #cv2.circle(output_image, (cx, cy), 4, (255, 0, 0), -1)


                self.track_history[track_id] = (cx, cy)

        cvzone.putTextRect(output_image, f'IN: {self.in_count}', (1000, 100), scale=3, thickness=2, colorT=(255, 255, 255), colorR=(99, 99, 99))
        cvzone.putTextRect(output_image, f'OUT: {self.out_count}', (40, 100), scale=3, thickness=2, colorT=(255, 255, 255), colorR=(64, 64, 64))
        cv2.line(output_image, (self.line_x, 0), (self.line_x, output_image.shape[0]), (255, 255, 255), 5)

        return Frame(output_image, data, 'RGB')

    def shutdown(self):
        """
        Cleanup method, called when the filter pipeline is shut down.
        """
        print('YOLOv8PersonCounterFilter shutting down')

if __name__ == '__main__':
    Filter.run_multi([
        (VideoIn, dict(sources='file://video.mp4!sync', outputs='tcp://*:5555')),
        (YOLOv8PersonCounterFilter, dict(sources='tcp://localhost:5555', outputs='tcp://*:5552', my_option='PersonCounting')),
        (VideoOut, dict(sources='tcp://localhost:5552', outputs='file://output.mp4')),
    ])

YOLOv8PersonCounterFilter setup: config.my_option='PersonCounting'
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs
YOLOv8 Class Names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: '