In [2]:
from ultralytics import YOLO
import cv2

# モデル読み込み
model = YOLO("yolov8n.pt")




In [9]:
import numpy as np

# Open the web camera stream
cap = cv2.VideoCapture(0) # サンプルからここだけ変更するだけ
# Define the red region (modify these values according to your needs)
red_region = np.array([[100, 100], [200, 200]])

def in_region(bbox, region):
    # Check if the bounding box intersects with the region
    print(f"bbox: {bbox.xyxy[0]}")
    print(f"region: {region}")
    x_left = max(bbox.xyxy[0][0], region[0][0])
    y_top = max(bbox.xyxy[0][1], region[0][1])
    x_right = min(bbox.xyxy[0][2], region[1][0])
    y_bottom = min(bbox.xyxy[0][3], region[1][1])

    # If the bounding box and the region intersect, the intersection will have positive area
    if x_right < x_left or y_bottom < y_top:
        return False
    return True


while cap.isOpened():
    success, frame = cap.read()
    k = cv2.waitKey(1)

    if k == ord('q'):  # 'q' キーが押されたら終了する
        break
    if success:
        results = model(frame)
        annotated_frame = results[0].plot()

        # Draw the red region on the frame
        cv2.rectangle(annotated_frame, tuple(red_region[0]), tuple(red_region[1]), (0, 0, 255), 2)

        # Check if any object is in the red region
        for obj in results:
            bbox = obj.boxes[0].numpy()[:4]
            label = obj.names[0] 
            if label == 'person' and in_region(bbox, red_region):
                cv2.putText(annotated_frame, 'Warning: Person is detected in the red region!', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        cv2.imshow("YOLOv8 Inference", annotated_frame)

cap.release()
cv2.destroyAllWindows()



0: 384x640 1 person, 113.3ms
Speed: 4.3ms preprocess, 113.3ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 65.2ms
Speed: 2.1ms preprocess, 65.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     309.97      141.61      1057.3      715.92]
region: [[100 100]
 [200 200]]
bbox: [     308.15      142.16      1056.9      716.02]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 64.5ms
Speed: 1.9ms preprocess, 64.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 85.8ms
Speed: 2.0ms preprocess, 85.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     308.87      142.22      1057.8       716.1]
region: [[100 100]
 [200 200]]
bbox: [     308.77       141.8      1058.8      715.98]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 64.3ms
Speed: 1.8ms preprocess, 64.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 62.1ms
Speed: 1.9ms preprocess, 62.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 63.6ms
Speed: 1.9ms preprocess, 63.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     310.07      140.69      1059.5      716.37]
region: [[100 100]
 [200 200]]
bbox: [     309.68      139.42      1060.2      716.24]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 65.8ms
Speed: 2.4ms preprocess, 65.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 61.8ms
Speed: 1.7ms preprocess, 61.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     309.35      140.59      1059.9      716.17]
region: [[100 100]
 [200 200]]
bbox: [     309.85      140.63      1060.1      716.12]
region: [[100 100]
 [200 200]]
bbox: [     309.64      140.35      1060.1      716.15]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 57.5ms
Speed: 1.7ms preprocess, 57.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 73.4ms
Speed: 1.7ms preprocess, 73.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     311.98      140.11      1057.9       716.1]
region: [[100 100]
 [200 200]]
bbox: [     310.27      140.19      1060.3      716.26]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 102.1ms
Speed: 5.1ms preprocess, 102.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 67.7ms
Speed: 1.9ms preprocess, 67.7ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     310.43       139.9      1059.7      716.15]
region: [[100 100]
 [200 200]]
bbox: [     309.29      140.22      1058.3      716.41]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 105.7ms
Speed: 2.6ms preprocess, 105.7ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 118.3ms
Speed: 4.0ms preprocess, 118.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     309.52      140.92      1056.9      715.91]
region: [[100 100]
 [200 200]]
bbox: [     305.63       141.1        1052      715.85]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 72.9ms
Speed: 2.6ms preprocess, 72.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 80.9ms
Speed: 1.8ms preprocess, 80.9ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)



bbox: [      293.2      145.85      1036.5      715.28]
region: [[100 100]
 [200 200]]
bbox: [     275.69      149.94      1018.4      714.94]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 57.3ms
Speed: 1.7ms preprocess, 57.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 53.5ms
Speed: 1.6ms preprocess, 53.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 53.0ms
Speed: 1.8ms preprocess, 53.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     254.35      152.09      996.01      715.44]
region: [[100 100]
 [200 200]]
bbox: [     244.86      158.16      974.65      714.97]
region: [[100 100]
 [200 200]]
bbox: [     231.55      159.05      960.96      714.86]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 83.1ms
Speed: 1.8ms preprocess, 83.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 54.3ms
Speed: 1.9ms preprocess, 54.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     207.62       159.2      939.28      714.83]
region: [[100 100]
 [200 200]]
bbox: [     196.18      164.58      911.89      714.74]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 74.0ms
Speed: 1.9ms preprocess, 74.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 53.9ms
Speed: 1.9ms preprocess, 53.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     182.68      165.39      895.48      714.53]
region: [[100 100]
 [200 200]]
bbox: [     161.76      168.49      869.23      714.33]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 53.5ms
Speed: 1.6ms preprocess, 53.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 54.2ms
Speed: 1.7ms preprocess, 54.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 55.3ms
Speed: 1.9ms preprocess, 55.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     153.24      170.02      856.31      714.81]
region: [[100 100]
 [200 200]]
bbox: [     148.17      171.22      846.64      715.28]
region: [[100 100]
 [200 200]]
bbox: [     147.43      172.89      845.44      713.96]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 55.0ms
Speed: 1.6ms preprocess, 55.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 54.2ms
Speed: 1.5ms preprocess, 54.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 54.8ms
Speed: 1.5ms preprocess, 54.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     145.38      172.98       845.2      714.31]
region: [[100 100]
 [200 200]]
bbox: [     146.73      172.13      845.03      714.07]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 66.6ms
Speed: 3.2ms preprocess, 66.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 55.3ms
Speed: 1.5ms preprocess, 55.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     148.81      170.46      846.91      714.88]
region: [[100 100]
 [200 200]]
bbox: [     148.73      171.05      855.22      714.19]
region: [[100 100]
 [200 200]]
bbox: [      149.7      168.44      857.88      714.39]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 56.0ms
Speed: 1.6ms preprocess, 56.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 54.9ms
Speed: 1.5ms preprocess, 54.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 54.2ms
Speed: 1.8ms preprocess, 54.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     150.12      166.45      863.28      714.39]
region: [[100 100]
 [200 200]]
bbox: [     151.29      165.18      866.46      714.59]
region: [[100 100]
 [200 200]]
bbox: [     154.55      164.24      869.65      714.86]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 54.4ms
Speed: 1.6ms preprocess, 54.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 62.0ms
Speed: 1.6ms preprocess, 62.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     159.35      162.32      873.67      714.88]
region: [[100 100]
 [200 200]]
bbox: [     164.07      161.31      877.74       714.7]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 54.0ms
Speed: 1.5ms preprocess, 54.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 55.0ms
Speed: 1.7ms preprocess, 55.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 56.3ms
Speed: 1.5ms preprocess, 56.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     169.49      160.73      883.38      714.58]
region: [[100 100]
 [200 200]]
bbox: [     174.23      157.99      891.64      714.99]
region: [[100 100]
 [200 200]]
bbox: [     175.56      154.37      900.03       714.9]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 55.1ms
Speed: 1.8ms preprocess, 55.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 53.8ms
Speed: 1.7ms preprocess, 53.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     174.39      150.15      906.72      714.92]
region: [[100 100]
 [200 200]]
bbox: [     178.81      147.12      912.97      714.95]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 58.0ms
Speed: 1.6ms preprocess, 58.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 58.7ms
Speed: 1.9ms preprocess, 58.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 55.5ms
Speed: 1.5ms preprocess, 55.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     180.83      143.38      924.58      715.78]
region: [[100 100]
 [200 200]]
bbox: [     193.61      143.21      933.88      715.44]
region: [[100 100]
 [200 200]]
bbox: [     203.79      145.26      943.73      715.51]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 81.3ms
Speed: 1.8ms preprocess, 81.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 54.5ms
Speed: 1.4ms preprocess, 54.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 54.2ms
Speed: 1.6ms preprocess, 54.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     212.74      143.62      954.85       715.6]
region: [[100 100]
 [200 200]]
bbox: [     222.34      143.55      966.73      715.38]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 55.3ms
Speed: 1.9ms preprocess, 55.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 55.3ms
Speed: 1.8ms preprocess, 55.3ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     230.64      143.44       973.3      716.06]
region: [[100 100]
 [200 200]]
bbox: [     237.05      144.27      975.84      715.96]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 55.0ms
Speed: 1.6ms preprocess, 55.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 55.4ms
Speed: 1.5ms preprocess, 55.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


bbox: [      238.3      142.79      981.08      715.87]
region: [[100 100]
 [200 200]]
bbox: [     238.04      144.31      980.97      716.32]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 55.8ms
Speed: 1.5ms preprocess, 55.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 55.0ms
Speed: 1.6ms preprocess, 55.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


bbox: [      235.4      145.69      975.54      715.92]
region: [[100 100]
 [200 200]]
bbox: [     225.88      145.06      968.75      715.44]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 72.0ms
Speed: 1.7ms preprocess, 72.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     212.75      145.31      955.46      715.46]
region: [[100 100]
 [200 200]]
bbox: [     201.34      146.95      940.66      714.94]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 71.8ms
Speed: 1.8ms preprocess, 71.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 54.9ms
Speed: 1.6ms preprocess, 54.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 55.1ms
Speed: 1.8ms preprocess, 55.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     184.29      146.68      927.82       715.3]
region: [[100 100]
 [200 200]]
bbox: [     176.12      146.91      912.84      714.78]
region: [[100 100]
 [200 200]]
bbox: [     171.57      148.01      905.61      714.76]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 55.9ms
Speed: 1.6ms preprocess, 55.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 53.9ms
Speed: 1.8ms preprocess, 53.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 55.4ms


bbox: [     164.63      147.32      900.36      714.62]
region: [[100 100]
 [200 200]]
bbox: [     161.87      147.29      896.38       714.5]
region: [[100 100]
 [200 200]]


Speed: 1.6ms preprocess, 55.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 57.2ms
Speed: 1.5ms preprocess, 57.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     161.64      148.03      894.79      714.72]
region: [[100 100]
 [200 200]]
bbox: [     162.44      147.78      896.43      714.68]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 113.8ms
Speed: 2.6ms preprocess, 113.8ms inference, 3.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 80.0ms
Speed: 5.9ms preprocess, 80.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     166.01      146.65      901.78      714.55]
region: [[100 100]
 [200 200]]
bbox: [     174.29      143.88      918.48      714.79]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 60.8ms
Speed: 1.9ms preprocess, 60.8ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 57.9ms
Speed: 2.2ms preprocess, 57.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 57.5ms
Speed: 1.6ms preprocess, 57.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)


bbox: [     191.85      140.96      943.83      715.15]
region: [[100 100]
 [200 200]]
bbox: [     218.12      141.29      970.12      714.64]
region: [[100 100]
 [200 200]]
bbox: [     244.45      143.31      990.67      714.74]
region: [[100 100]
 [200 200]]



0: 384x640 1 person, 134.1ms
Speed: 2.7ms preprocess, 134.1ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 99.4ms
Speed: 3.3ms preprocess, 99.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     263.85      142.62      1011.9      714.45]
region: [[100 100]
 [200 200]]
bbox: [     298.06       139.6      1046.5       714.9]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 103.3ms
Speed: 2.7ms preprocess, 103.3ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     309.26      139.62      1062.2         716]
region: [[100 100]
 [200 200]]


0: 384x640 1 person, 166.0ms
Speed: 4.9ms preprocess, 166.0ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 90.0ms
Speed: 3.4ms preprocess, 90.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)



bbox: [     321.87      139.27      1073.5      715.51]
region: [[100 100]
 [200 200]]
bbox: [      324.9      137.94      1076.7      715.44]
region: [[100 100]
 [200 200]]


KeyboardInterrupt: 