Install YOLO-World-ONNX:

In [None]:
!pip install yolo-world-onnx

Download a pre-trained ONNX model:


In [None]:
!wget https://github.com/Ziad-Algrafi/ODLabel/raw/main/assets/yolov8m-worldv2.onnx

Run inference on an image:

In [None]:
from yolo_world_onnx import YOLOWORLD
import cv2 as cv
from google.colab.patches import cv2_imshow

# Load the YOLO model
model_path = "yolov8m-worldv2.onnx"
model = YOLOWORLD(model_path, device="cpu")

# Set the class names
class_names = ["person", "car", "dog", "cat"]
model.set_classes(class_names)

# Retrieve the names
names = model.names

# Load an image
image_url = "https://example.com/image.jpg"
!wget -O image.jpg {image_url}
image = cv.imread("image.jpg")

# Perform object detection
boxes, scores, class_ids = model(image, conf=0.5, imgsz=640, iou=0.4)

# Draw bounding boxes on the image
for box, score, class_id in zip(boxes, scores, class_ids):
    x, y, w, h = box
    x1, y1 = int(x - w / 2), int(y - h / 2)
    x2, y2 = int(x + w / 2), int(y + h / 2)
    cv.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    class_name = names[class_id]
    cv.putText(image, f"{class_name}: {score:.2f}", (x1, y1 - 10), cv.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

# Display the image
cv2_imshow(image)

Run inference on a video:


In [None]:
from yolo_world_onnx import YOLOWORLD
import cv2 as cv
from google.colab.patches import cv2_imshow

# Load the YOLO model
model_path = "yolov8m-worldv2.onnx"
model = YOLOWORLD(model_path, device="cpu")

# Set the class names
class_names = ["person", "car", "dog", "cat"]
model.set_classes(class_names)

# Retrieve the names
names = model.names

# Open a video file or capture from a camera
video_path = "/path/to/your/video.mp4"
cap = cv.VideoCapture(video_path)

while True:
    # Read a frame from the video
    ret, frame = cap.read()
    if not ret:
        break

    # Perform object detection
    boxes, scores, class_ids = model(frame, conf=0.35, imgsz=640, iou=0.4)

    # Draw bounding boxes on the frame
    for box, score, class_id in zip(boxes, scores, class_ids):
        x, y, w, h = box
        x1, y1 = int(x - w / 2), int(y - h / 2)
        x2, y2 = int(x + w / 2), int(y + h / 2)
        cv.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        class_name = names[class_id]
        cv.putText(frame, f"{class_name}: {score:.2f}", (x1, y1 - 10), cv.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # Display the frame
    cv2_imshow(frame)
    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()