In [2]:
import cv2
import torch
import torchvision
from torchvision.models.detection import FCOS_ResNet50_FPN_Weights

# FCOS 모델 로드
weights = FCOS_ResNet50_FPN_Weights.DEFAULT
model = torchvision.models.detection.fcos_resnet50_fpn(weights=weights)
model.eval()

Downloading: "https://download.pytorch.org/models/fcos_resnet50_fpn_coco-99b0c9b7.pth" to C:\Users\daeho/.cache\torch\hub\checkpoints\fcos_resnet50_fpn_coco-99b0c9b7.pth
100.0%


FCOS(
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(256, eps=1e

In [10]:
# 이미지 로드 및 전처리
img = cv2.imread('./data/train/1/prep_1_frame47.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
input_tensor = torchvision.transforms.ToTensor()(img)

In [11]:
# 모델 추론
with torch.no_grad():
    predictions = model(input_tensor.unsqueeze(0))[0]

# 결과 처리
boxes = predictions['boxes'].cpu().data.numpy()
labels = predictions['labels'].cpu().data.numpy()
scores = predictions['scores'].cpu().data.numpy()

# 눈 객체 찾기
eye_indices = (labels == 4)  # 눈 객체의 라벨 인덱스는 4
eye_boxes = boxes[eye_indices]

# 눈 중심 좌표 계산
if eye_boxes.size > 0:
    for box in eye_boxes:
        x1, y1, x2, y2 = [int(coord) for coord in box]
        eye_center_x = (x1 + x2) // 2
        eye_center_y = (y1 + y2) // 2
        print(f'Eye center: ({eye_center_x}, {eye_center_y})')
        
        # 결과 시각화 (옵션)
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.circle(img, (eye_center_x, eye_center_y), 5, (0, 0, 255), -1)
    cv2.imshow('Result', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
else:
    print('눈을 찾을 수 없습니다.')

눈을 찾을 수 없습니다.
