In [3]:
import torch

In [4]:
print("PyTorch:", torch.__version__)
print("MPS available: ", torch.mps.is_available())
print("MPS is built: ", torch.backends.mps.is_built())
print("MPS is available: ", torch.backends.mps.is_available())

PyTorch: 2.8.0
MPS available:  True
MPS is built:  True
MPS is available:  True


In [5]:
device = "mps" if torch.mps.is_available() else "cpu"
print(f"device is {device}")

device is mps


In [6]:
from ultralytics import YOLO

In [16]:
model = YOLO("yolov8n.pt")
print(f"model name is {model.model_name}")

model name is yolov8n.pt


In [17]:
import requests
from PIL import Image
from io import BytesIO

In [18]:
image_url = "https://ultralytics.com/images/bus.jpg"
response = requests.get(image_url)
if response.status_code == 200:
    img = Image.open(BytesIO(response.content))
else:
    print(f"failed to download image from {image_url}")

In [19]:
result = model.predict(source=img, device=device, imgsz=640, conf=0.25, save=True)


0: 640x480 4 persons, 1 bus, 1 stop sign, 33.1ms
Speed: 4.7ms preprocess, 33.1ms inference, 20.8ms postprocess per image at shape (1, 3, 640, 480)
Results saved to [1m/Users/cwyark/project/snippets/my-ai-cookbooks/runs/detect/predict3[0m


In [20]:
print(f"number of results are {len(result)}")

number of results are 1


In [21]:
# get first result
r = result[0]
print(r)

ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant',

In [23]:
boxes = r.boxes
print(boxes)

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([ 5.,  0.,  0.,  0.,  0., 11.], device='mps:0')
conf: tensor([0.8734, 0.8657, 0.8528, 0.8252, 0.2611, 0.2551], device='mps:0')
data: tensor([[2.2871e+01, 2.3128e+02, 8.0500e+02, 7.5684e+02, 8.7345e-01, 5.0000e+00],
        [4.8550e+01, 3.9855e+02, 2.4535e+02, 9.0270e+02, 8.6569e-01, 0.0000e+00],
        [6.6947e+02, 3.9219e+02, 8.0972e+02, 8.7704e+02, 8.5284e-01, 0.0000e+00],
        [2.2152e+02, 4.0580e+02, 3.4497e+02, 8.5754e+02, 8.2522e-01, 0.0000e+00],
        [0.0000e+00, 5.5053e+02, 6.3007e+01, 8.7344e+02, 2.6111e-01, 0.0000e+00],
        [5.8174e-02, 2.5446e+02, 3.2557e+01, 3.2487e+02, 2.5507e-01, 1.1000e+01]], device='mps:0')
id: None
is_track: False
orig_shape: (1080, 810)
shape: torch.Size([6, 6])
xywh: tensor([[413.9369, 494.0588, 782.1313, 525.5630],
        [146.9480, 650.6274, 196.7952, 504.1505],
        [739.5964, 634.6108, 140.2471, 484.8493],
        [283.2440, 631.6676, 123.4533, 451.7380],
       

In [27]:
cls = boxes.cls.tolist()
conf = boxes.conf.tolist()
xyxy = boxes.xyxy.tolist()

In [28]:
print(f"Detections: {len(cls)}")

Detections: 6


In [30]:
for i, (c, cf, b) in enumerate(zip(cls, conf, xyxy), 1):
    print(f"{i:02d}. {model.model.names[int(c)]}  conf={cf:.2f}  box={b}")

01. bus  conf=0.87  box=[22.87126922607422, 231.27731323242188, 805.0025634765625, 756.84033203125]
02. person  conf=0.87  box=[48.550418853759766, 398.55218505859375, 245.34556579589844, 902.7026977539062]
03. person  conf=0.85  box=[669.472900390625, 392.1861572265625, 809.7200317382812, 877.0354614257812]
04. person  conf=0.83  box=[221.517333984375, 405.798583984375, 344.9706726074219, 857.5365600585938]
05. person  conf=0.26  box=[0.0, 550.5250244140625, 63.00697708129883, 873.4429321289062]
06. stop sign  conf=0.26  box=[0.05817389488220215, 254.45938110351562, 32.557411193847656, 324.8741760253906]
