In [1]:
from ultralytics import YOLO
import torch

In [2]:
MODEL_PATH = "yolov8n.pt"


model = YOLO(MODEL_PATH)

In [3]:
if torch.cuda.is_available():
    model.to("cuda")
else:
    model.to("cpu")

In [4]:
import cv2

In [5]:
camera = cv2.VideoCapture(0)
ret, frame = camera.read()

In [6]:
camera.release()
cv2.destroyAllWindows()

In [7]:
copyframe = frame.copy()

In [8]:
results = model(frame, stream=False, verbose=False, conf=0.5)

In [9]:
results

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted p

In [10]:
results[0].boxes

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([0.], device='cuda:0')
conf: tensor([0.8767], device='cuda:0')
data: tensor([[101.9325, 130.2327, 549.9501, 479.2849,   0.8767,   0.0000]], device='cuda:0')
id: None
is_track: False
orig_shape: (480, 640)
shape: torch.Size([1, 6])
xywh: tensor([[325.9413, 304.7588, 448.0176, 349.0522]], device='cuda:0')
xywhn: tensor([[0.5093, 0.6349, 0.7000, 0.7272]], device='cuda:0')
xyxy: tensor([[101.9325, 130.2327, 549.9501, 479.2849]], device='cuda:0')
xyxyn: tensor([[0.1593, 0.2713, 0.8593, 0.9985]], device='cuda:0')

In [11]:
results[0].boxes[0].conf[0], results[0].boxes[0].cls[0], results[0].boxes[0].xyxy[0]

(tensor(0.8767, device='cuda:0'),
 tensor(0., device='cuda:0'),
 tensor([101.9325, 130.2327, 549.9501, 479.2849], device='cuda:0'))

In [12]:
CLASS_NAMES = model.names
CLASS_NAMES[0]

'person'

In [13]:
TARGET_CLASS_PERSON = "person"

In [None]:
for result in results:
    boxes = result.boxes
    for box in boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        conf = float(box.conf[0])
        cls = int(box.cls[0]) # in this case, cls = 0 (person)
        class_name = CLASS_NAMES[cls]

        if class_name.lower() == TARGET_CLASS_PERSON.lower():
            cv2.rectangle(copyframe, (x1, y1), (x2, y2), (0, 255, 0), 2)
            label = f"{class_name}: {conf:.2f}"
            cv2.putText(copyframe, label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)


In [15]:
cv2.imshow("al", copyframe)

cv2.waitKey()
cv2.destroyAllWindows()

In [16]:
camera.release()

In [17]:
camera = cv2.VideoCapture(0)
while True:
    ret, frame = camera.read()

    copyframe = frame.copy()
    results = model(frame, stream=False, verbose=False, conf=0.5)


    for result in results:
        boxes = result.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = float(box.conf[0])
            cls = int(box.cls[0]) # in this case, cls = 0 (person)
            class_name = CLASS_NAMES[cls]

            if class_name.lower() == TARGET_CLASS_PERSON.lower():
                cv2.rectangle(copyframe, (x1, y1), (x2, y2), (0, 255, 0), 2)
                label = f"{class_name}: {conf:.2f}"
                cv2.putText(copyframe, label, (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    
    cv2.imshow("detected", copyframe)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

camera.release()
cv2.destroyAllWindows()
