<a href="https://colab.research.google.com/github/dhanishetty/OpenFilter/blob/main/12_YOLOv8s_Count_People_in_Zone.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install vidgear -q
!pip install  opencv-python ultralytics openfilter cvzone -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/50.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.6/50.6 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/122.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.0/122.0 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.1/107.1 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m113.0 MB/s[0m eta [36m0:0

In [None]:
import torch
import cvzone
import numpy as np
import cv2
from ultralytics import YOLO # Import the YOLO class for YOLOv8


from openfilter.filter_runtime import Frame, Filter
from openfilter.filter_runtime.filters.video_in import VideoIn
from openfilter.filter_runtime.filters.video_out import VideoOut

class YOLOv8PersonCounterFilter(Filter):
    def setup(self, config):
        print(f'YOLOv8PersonCounterFilter setup: {config.my_option=}')

        self.model = YOLO('yolov8s.pt')
        self.model.eval() # Set model to evaluation mode


        self.class_names_dict = self.model.names
        print("YOLOv8s Class Names:", self.class_names_dict)

        # Set a confidence threshold for detections.
        self.confidence_threshold = 0.1

        # Define the class IDs you want to count
        self.person_class_id = None
        self.track_history = {}
        self.in_count = 0
        self.out_count = 0
        self.names = self.model.names
        self.area = [(785,1295),(1896,1016),(1175,516),(638,550),(679,934)]

    def process(self, frames):
        frame_data = frames['main'].rw_rgb
        image = frame_data.image  # NumPy array (H, W, C)
        data = frame_data.data    # Metadata dictionary

        # Create a copy of the image to draw on to avoid modifying the original input.
        output_image = image.copy()


        results = self.model.track(output_image, persist=True, classes=[0], conf=self.confidence_threshold)
        person_count = 0

        if results[0].boxes.id is not None:
            ids = results[0].boxes.id.cpu().numpy().astype(int)
            boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
            class_ids = results[0].boxes.cls.int().cpu().tolist()

            for track_id, box, class_id in zip(ids, boxes, class_ids):
                x1, y1, x2, y2 = box
                name = self.names[class_id]
                cx = int((x1+x2)/2)
                cy = int(y2)
                result=cv2.pointPolygonTest(np.array(self.area,np.int32),((cx,cy)),False)
                if result > 0:
                    #print(result) we could see +1 if objects in area and -1 if they are outside of area
                    person_count += 1
                    cv2.circle(output_image,(cx,y2),2,(0,255,0),-1)
                    cv2.rectangle(output_image, (x1, y1), (x2, y2), (50, 205, 50), 1)
                    label = f'Person:{track_id}'
                    # Draw the label text.
                    (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
                    cv2.rectangle(output_image, (x1, y1 - text_height - baseline - 5),
                             (x1 + text_width + 5, y1), (50, 205, 50), -1)
                    cv2.putText(output_image, label, (x1 + 2, y1 - baseline - 2),
                          cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv2.LINE_AA)
        cv2.polylines(output_image,[np.array(self.area,np.int32)], True,(255,255,255),3)
        #cv2_imshow(output_image)
        count_text = f'Persons:{person_count}'
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 3
        font_thickness = 2
        text_color = (63, 63, 63) #  text
        background_color = (255,255,255) # background

        (count_text_width, count_text_height), count_baseline = cv2.getTextSize(count_text, font, font_scale, font_thickness)

        cv2.rectangle(output_image, (1990, 205),
                      (1990 + count_text_width + 10, 205 + count_text_height + count_baseline + 3),
                      background_color, -1)

        cv2.putText(output_image, count_text, (1990, 218 + count_text_height),
                    font, font_scale, text_color, font_thickness, cv2.LINE_AA)



        return Frame(output_image, data, 'RGB')

    def shutdown(self):
        """
        Cleanup method, called when the filter pipeline is shut down.
        """
        print('YOLOv8PersonCounterFilter shutting down')

if __name__ == '__main__':
    Filter.run_multi([
        (VideoIn, dict(sources='file://video.mp4!sync', outputs='tcp://*:5555')),
        (YOLOv8PersonCounterFilter, dict(sources='tcp://localhost:5555', outputs='tcp://*:5552', my_option='PersonCounting')),
        (VideoOut, dict(sources='tcp://localhost:5552', outputs='file://output.mp4')),
    ])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

0: 352x640 8 persons, 26.4ms
Speed: 5.5ms preprocess, 26.4ms inference, 9.6ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 8 persons, 15.7ms
Speed: 4.8ms preprocess, 15.7ms inference, 3.1ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 8 persons, 13.3ms
Speed: 3.9ms preprocess, 13.3ms inference, 4.9ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 8 persons, 11.9ms
Speed: 4.0ms preprocess, 11.9ms inference, 2.4ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 8 persons, 12.4ms
Speed: 4.3ms preprocess, 12.4ms inference, 2.4ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 8 persons, 12.5ms
Speed: 6.2ms preprocess, 12.5ms inference, 3.2ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 8 persons, 11.4ms
Speed: 3.7ms preprocess, 11.4ms inference, 2.7ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 8 persons, 13.5ms
Speed: 4.1ms pre