In [7]:
import cv2

# 기본 카메라를 연다 (0번 카메라).
# 다른 카메라를 사용하려면 적절한 인덱스를 사용하세요.
cap = cv2.VideoCapture(0)

# 카메라가 정상적으로 열렸는지 확인한다.
if not cap.isOpened():
    print("Cannot open camera")
    exit()

# 카메라에서 영상을 계속해서 받아온다.
while True:
    # 카메라에서 한 프레임을 읽는다.
    ret, frame = cap.read()
    # 만약 프레임이 제대로 읽히지 않았다면 'ret'가 False가 된다.
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break

    # 결과 프레임을 보여준다.
    cv2.imshow('frame', frame)
    if cv2.waitKey(1) == ord('q'):
        break

# 작업이 끝났다면 후처리를 한다.
cap.release()
cv2.destroyAllWindows()

Can't receive frame (stream end?). Exiting ...


In [1]:
import cv2
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F

In [2]:
# Load a model pre-trained on COCO
model = fasterrcnn_resnet50_fpn(pretrained=True)
model = model.eval()



In [3]:
# COCO category names
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert to tensor
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    tensor = F.to_tensor(frame).unsqueeze(0)

    # Perform inference
    with torch.no_grad():
        predictions = model(tensor)[0]

    # Draw bounding boxes
    for box, score, label in zip(predictions['boxes'], predictions['scores'], predictions['labels']):
        if score > 0.5:  # only consider confident predictions
            x1, y1, x2, y2 = box.numpy().astype(int)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, COCO_INSTANCE_CATEGORY_NAMES[label.item()], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

    cv2.imshow('frame', frame)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
