Import libraries

In [1]:
from ultralytics import YOLO
import os
import cv2

Load model

In [2]:
model = YOLO('yolov8n.pt')

Objects in the model

In [3]:
model.names

{0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorcycle',
 4: 'airplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',
 10: 'fire hydrant',
 11: 'stop sign',
 12: 'parking meter',
 13: 'bench',
 14: 'bird',
 15: 'cat',
 16: 'dog',
 17: 'horse',
 18: 'sheep',
 19: 'cow',
 20: 'elephant',
 21: 'bear',
 22: 'zebra',
 23: 'giraffe',
 24: 'backpack',
 25: 'umbrella',
 26: 'handbag',
 27: 'tie',
 28: 'suitcase',
 29: 'frisbee',
 30: 'skis',
 31: 'snowboard',
 32: 'sports ball',
 33: 'kite',
 34: 'baseball bat',
 35: 'baseball glove',
 36: 'skateboard',
 37: 'surfboard',
 38: 'tennis racket',
 39: 'bottle',
 40: 'wine glass',
 41: 'cup',
 42: 'fork',
 43: 'knife',
 44: 'spoon',
 45: 'bowl',
 46: 'banana',
 47: 'apple',
 48: 'sandwich',
 49: 'orange',
 50: 'broccoli',
 51: 'carrot',
 52: 'hot dog',
 53: 'pizza',
 54: 'donut',
 55: 'cake',
 56: 'chair',
 57: 'couch',
 58: 'potted plant',
 59: 'bed',
 60: 'dining table',
 61: 'toilet',
 62: 'tv',
 63: 'laptop',
 64: 'mou

Input image

In [4]:
img = cv2.imread('input.jpeg')

Detect objects

In [5]:
result = model(img)


0: 352x640 4 persons, 1 dog, 1 frisbee, 146.7ms
Speed: 13.4ms preprocess, 146.7ms inference, 15.1ms postprocess per image at shape (1, 3, 352, 640)


Show the images

In [6]:
result[0].show()

Loop through all boxes

In [9]:
for r in result:
    print(r.boxes)

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([ 0.,  0.,  0.,  0., 16., 29.])
conf: tensor([0.9250, 0.9238, 0.8919, 0.8824, 0.8775, 0.2773])
data: tensor([[4.2191e+02, 9.9434e+01, 6.5219e+02, 4.9623e+02, 9.2496e-01, 0.0000e+00],
        [8.1112e+02, 1.2329e+02, 1.0253e+03, 5.0358e+02, 9.2379e-01, 0.0000e+00],
        [1.3040e+02, 1.5664e+02, 2.6757e+02, 5.0614e+02, 8.9190e-01, 0.0000e+00],
        [2.9722e+02, 1.9584e+02, 4.2175e+02, 4.9498e+02, 8.8239e-01, 0.0000e+00],
        [6.2975e+02, 3.4144e+02, 7.4409e+02, 5.0165e+02, 8.7751e-01, 1.6000e+01],
        [2.3970e+02, 2.1747e+02, 2.6665e+02, 2.8915e+02, 2.7726e-01, 2.9000e+01]])
id: None
is_track: False
orig_shape: (597, 1100)
shape: torch.Size([6, 6])
xywh: tensor([[537.0510, 297.8339, 230.2728, 396.7997],
        [918.2189, 313.4348, 214.1977, 380.2874],
        [198.9844, 331.3923, 137.1730, 349.4967],
        [359.4871, 345.4082, 124.5337, 299.1371],
        [686.9200, 421.5468, 114.3357, 160.2126],
     

Check the class and confidence of boxes

In [17]:
for r in result:
    boxes = r.boxes

    for box in boxes:
        cls_id = int(box.cls[0])
        conf = float(box.conf[0])
        x1, y1, x2, y2 = box.xyxy[0]

        print(cls_id)
        print(conf)
        print(x1, y1, x2, y2)

        print('*' * 20)

0
0.924964427947998
tensor(421.9146) tensor(99.4341) tensor(652.1874) tensor(496.2337)
********************
0
0.9237902164459229
tensor(811.1201) tensor(123.2912) tensor(1025.3177) tensor(503.5786)
********************
0
0.8919014930725098
tensor(130.3979) tensor(156.6439) tensor(267.5710) tensor(506.1406)
********************
0
0.8823862671852112
tensor(297.2202) tensor(195.8396) tensor(421.7539) tensor(494.9767)
********************
16
0.8775050640106201
tensor(629.7521) tensor(341.4404) tensor(744.0878) tensor(501.6531)
********************
29
0.27725714445114136
tensor(239.7000) tensor(217.4657) tensor(266.6503) tensor(289.1504)
********************


Create directory

In [18]:
save_dir = 'slices'
os.makedirs(save_dir, exist_ok = True)

Crop and save the images of humans

In [23]:
persons = 0
for r in result:
    boxes = r.boxes

    for box in boxes:
        cls_id = int(box.cls[0])
        conf = float(box.conf[0])

        if cls_id == 0 and conf > .5:
            x1, y1, x2, y2 = map(int, box.xyxy[0])

            crop_img = img[y1:y2 , x1:x2]

            save_path = os.path.join(save_dir, f"person_{persons}.jpg")
            cv2.imwrite(save_path, crop_img)

            persons += 1

