In [1]:
import os, json, math
import numpy as np
from ultralytics import YOLO
from sklearn.metrics import mean_squared_error

In [2]:
# =====================================================
# 3. YOLOv8 Pose 모델 학습
# =====================================================
model = YOLO("yolov8l-pose.pt")  # pretrained pose 모델 사용

model.train(
    data="data.yaml",
    epochs=100,
    imgsz=640,
    batch=16,
    name="chimney_pose",
    project="runs",
    seed=42,
    patience=50, # map 상승 없을시 중단
    verbose=True  #학습 로그 출력
)

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8l-pose.pt to 'yolov8l-pose.pt': 100% ━━━━━━━━━━━━ 85.3MB 6.7MB/s 12.7s12.7s<0.0ss
New https://pypi.org/project/ultralytics/8.3.200 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.198  Python-3.11.13 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3060, 12288MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr

ultralytics.utils.metrics.PoseMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x00000211EC12C9D0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)', 'Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)', 'Precision-Recall(P)', 'F1-Confidence(P)', 'Precision-Confidence(P)', 'Recall-Confidence(P)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.0340

In [7]:
# =====================================================
# 4. 학습된 best.pt 불러오기
# =====================================================
best_model_path = "runs/chimney_pose/weights/best.pt"
model = YOLO(best_model_path)

In [8]:
# =====================================================
# 5. 검증 데이터에서 높이 추정 + RMSE 계산 (여러 굴뚝 대응), 메타 데이터(roll, pitch)를 사용하지 않음
# =====================================================
def load_gt_coords(json_path):
    with open(json_path, "r") as f:
        data = json.load(f)
    gt_points, gt_heights = [], []
    for _, v in data.items():
        for region in v["regions"]:
            xs = region["shape_attributes"]["all_points_x"]
            ys = region["shape_attributes"]["all_points_y"]
            gt_points.append(((xs[0], ys[0]), (xs[1], ys[1])))
            gt_heights.append(float(region["region_attributes"]["chi_height_m"]))
    return gt_points, gt_heights


def pixel_distance(p1, p2):
    return math.sqrt((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2)


VAL_IMG_DIR = "../valid/images"
VAL_JSON_DIR = "../valid/json"

true_heights, pred_heights = [], []

for img_name in os.listdir(VAL_IMG_DIR):
    if not img_name.endswith(".jpg"):
        continue
    img_path = os.path.join(VAL_IMG_DIR, img_name)
    json_path = os.path.join(VAL_JSON_DIR, img_name.replace(".jpg", ".json"))

    # GT 불러오기
    gt_points, gt_heights = load_gt_coords(json_path)

    # YOLO 예측
    results = model.predict(img_path, verbose=False)
    kpts_all = results[0].keypoints.xy.cpu().numpy()  # shape (N, K, 2)
    if kpts_all.size == 0:
        continue

    # --- GT-예측 매칭 (간단히: 픽셀 길이 기준 최근접 매칭) ---
    gt_pixel_lens = [pixel_distance(p[0], p[1]) for p in gt_points]

    for kp in kpts_all:   # 각 예측 객체
        if kp.shape[0] >= 2:
            # 예측 픽셀 길이
            x1, y1 = float(kp[0][0]), float(kp[0][1])
            x2, y2 = float(kp[1][0]), float(kp[1][1])
            pred_pixel_len = pixel_distance((x1, y1), (x2, y2))

            # GT 중에서 가장 가까운 픽셀 길이 찾기
            diffs = [abs(pred_pixel_len - gpl) for gpl in gt_pixel_lens]
            min_idx = int(np.argmin(diffs))

            # 스케일링 후 높이 예측
            scale = gt_heights[min_idx] / gt_pixel_lens[min_idx]
            pred_height = pred_pixel_len * scale

            true_heights.append(gt_heights[min_idx])
            pred_heights.append(pred_height)

# RMSE 출력
rmse = math.sqrt(mean_squared_error(true_heights, pred_heights))
print("검증 샘플 수:", len(true_heights))
print("RMSE (m):", rmse)


검증 샘플 수: 1313
RMSE (m): 14.260794618141835


In [9]:
import cv2
import os
import math
import numpy as np
from sklearn.metrics import mean_squared_error

SAVE_DIR = "./vis_results"
os.makedirs(SAVE_DIR, exist_ok=True)

true_heights, pred_heights = [], []

for img_name in os.listdir(VAL_IMG_DIR):
    if not img_name.endswith(".jpg"):
        continue
    img_path = os.path.join(VAL_IMG_DIR, img_name)
    json_path = os.path.join(VAL_JSON_DIR, img_name.replace(".jpg", ".json"))

    # GT 불러오기
    gt_points, gt_heights = load_gt_coords(json_path)

    # YOLO 예측
    results = model.predict(img_path, verbose=False)
    kpts_all = results[0].keypoints.xy.cpu().numpy()
    if kpts_all.size == 0:
        continue

    # 이미지 로드
    img = cv2.imread(img_path)
    h, w, _ = img.shape

    gt_pixel_lens = [pixel_distance(p[0], p[1]) for p in gt_points]

    # 굴뚝별 텍스트를 모아둠
    chimney_texts = []

    for cid, kp in enumerate(kpts_all):   # 각 예측 객체
        if kp.shape[0] >= 2:
            x1, y1 = int(kp[0][0]), int(kp[0][1])
            x2, y2 = int(kp[1][0]), int(kp[1][1])
            pred_pixel_len = pixel_distance((x1, y1), (x2, y2))

            # GT 중 최근접 매칭
            diffs = [abs(pred_pixel_len - gpl) for gpl in gt_pixel_lens]
            min_idx = int(np.argmin(diffs))

            scale = gt_heights[min_idx] / gt_pixel_lens[min_idx]
            pred_height = pred_pixel_len * scale

            # 리스트에 저장
            true_heights.append(gt_heights[min_idx])
            pred_heights.append(pred_height)

            # --- keypoint 시각화 ---
            cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.circle(img, (x1, y1), 5, (0, 0, 255), -1)
            cv2.circle(img, (x2, y2), 5, (0, 0, 255), -1)

            # 굴뚝별 텍스트 추가
            chimney_texts.append(f"ID{cid}: GT {gt_heights[min_idx]:.1f}m | Pred {pred_height:.1f}m")

    # --- 이미지 상단 중앙에 여러 줄 텍스트 출력 ---
    if chimney_texts:
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.7
        thickness = 2
        line_spacing = 25  # 줄 간격

        # 전체 텍스트 박스 크기 계산
        text_sizes = [cv2.getTextSize(t, font, font_scale, thickness)[0] for t in chimney_texts]
        text_w = max(ts[0] for ts in text_sizes)
        text_h = text_sizes[0][1]

        # 시작 좌표 (가장 위 중앙)
        start_x = (w - text_w) // 2
        start_y = 30

        # 배경 박스
        total_h = len(chimney_texts) * line_spacing
        cv2.rectangle(img,
                      (start_x - 10, start_y - text_h - 10),
                      (start_x + text_w + 10, start_y + total_h + 10),
                      (0, 0, 0), -1)

        # 각 줄 출력
        for i, text in enumerate(chimney_texts):
            y = start_y + i * line_spacing
            cv2.putText(img, text, (start_x, y),
                        font, font_scale, (255, 255, 0), thickness)

    # 저장
    save_path = os.path.join(SAVE_DIR, img_name)
    cv2.imwrite(save_path, img)

# 최종 RMSE
rmse = math.sqrt(mean_squared_error(true_heights, pred_heights))
print("검증 샘플 수:", len(true_heights))
print("RMSE (m):", rmse)
print(f"시각화 결과는 '{SAVE_DIR}' 폴더에 저장되었습니다.")


검증 샘플 수: 1313
RMSE (m): 14.295957085078733
시각화 결과는 './vis_results' 폴더에 저장되었습니다.
