In [1]:
!pip install opencv-python ultralytics

Collecting ultralytics
  Downloading ultralytics-8.0.231-py3-none-any.whl (663 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m663.2/663.2 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.0.231


In [16]:
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
import os

model = YOLO("yolov8n-seg.pt")
names = model.model.names
cap = cv2.VideoCapture("car.mp4")

output_folder = "output_instances"
os.makedirs(output_folder, exist_ok=True)

out = cv2.VideoWriter('instance-segmentation.avi',
                      cv2.VideoWriter_fourcc(*'MJPG'),
                      30, (int(cap.get(3)), int(cap.get(4))))

# Count variables
car_count = 0
bus_count = 0
truck_count = 0

while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    results = model.predict(im0)
    if results[0].masks is not None:
        clss = results[0].boxes.cls.cpu().tolist()
        masks = results[0].masks.xy

        annotator = Annotator(im0, line_width=2)

        for idx, (mask, cls) in enumerate(zip(masks, clss)):
            det_label = names[int(cls)]
            annotator.seg_bbox(mask=mask,
                               mask_color=colors(int(cls), True),
                               det_label=det_label)

            # Save each instance segmented object
            instance_folder = os.path.join(output_folder, det_label)
            os.makedirs(instance_folder, exist_ok=True)
            instance_path = os.path.join(instance_folder, f"{det_label}_{idx}.png")
            cv2.imwrite(instance_path, im0)

            # Count objects
            if det_label == 'car':
                car_count += 1
            elif det_label == 'bus':
                bus_count += 1
            elif det_label == 'truck':
                truck_count += 1

        # Add text to the video
        cv2.putText(im0, f"Car Count: {car_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        cv2.putText(im0, f"Bus Count: {bus_count}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
        cv2.putText(im0, f"Truck Count: {truck_count}", (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    out.write(im0)
    # cv2.imshow("instance-segmentation", im0)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

out.release()
cap.release()
cv2.destroyAllWindows()



0: 384x640 1 bus, 1 truck, 190.3ms
Speed: 3.9ms preprocess, 190.3ms inference, 6.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 truck, 184.6ms
Speed: 3.3ms preprocess, 184.6ms inference, 3.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 bus, 1 truck, 193.9ms
Speed: 2.8ms preprocess, 193.9ms inference, 6.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 trucks, 185.0ms
Speed: 2.7ms preprocess, 185.0ms inference, 6.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 2 trucks, 200.7ms
Speed: 2.7ms preprocess, 200.7ms inference, 14.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 1 bus, 1 truck, 172.6ms
Speed: 2.7ms preprocess, 172.6ms inference, 11.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 1 bus, 1 truck, 191.8ms
Speed: 2.6ms preprocess, 191.8ms inference, 15.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 1 bus, 1 truck, 174.3ms
Speed: 3.1ms p

KeyboardInterrupt: ignored