# 예측 후 후처리

## post_process.py - 박스 후처리
### postprocess_boxes()

In [1]:
import numpy as np

def postprocess_boxes(pred_bbox, original_image, input_size, 
                      score_threshold):

    valid_scale = [0, np.inf]
    pred_bbox = np.array(pred_bbox)

    pred_xywh = pred_bbox[:, 0:4]
    pred_conf = pred_bbox[:, 4]
    pred_prob = pred_bbox[:, 5:]

    # 1. (x, y, w, h) --> (xmin, ymin, xmax, ymax) 
    pred_coor = np.concatenate( 
        [pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
         pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)

    # 2. (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org) 
    org_h, org_w = original_image.shape[:2]
    resize_ratio = min(input_size/org_w, input_size/org_h)

    dw = (input_size - resize_ratio * org_w) / 2 
    dh = (input_size - resize_ratio * org_h) / 2 

    pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
    pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio

    # 3. 범위를 벗어나는 박스를 자름 
    pred_coor = np.concatenate(
        [np.maximum(pred_coor[:, :2], [0, 0]),
         np.minimum(pred_coor[:, 2:], [org_w-1, org_h-1])],
        axis=-1)
    invalid_mask = np.logical_or(
        (pred_coor[:, 0] > pred_coor[:, 2]),
        (pred_coor[:, 1] > pred_coor[:, 3]))
    pred_coor[invalid_mask] = 0 

    # 4. 유효하지 않은 상자 무시 
    bboxes_scale = np.sqrt(
        np.multiply.reduce(
            pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
    scale_mask = np.logical_and(
        (valid_scale[0] < bboxes_scale),
        (bboxes_scale < valid_scale[1]))

    # 5. 낮은 스코어의 상자 무시 
    classes = np.argmax(pred_prob, axis=-1)
    scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
    score_mask = scores > score_threshold
    mask = np.logical_and(scale_mask, score_mask)
    coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]

    return np.concatenate([coors, scores[:, np.newaxis], 
                           classes[:, np.newaxis]], axis=-1)

### NMS

In [None]:
import numpy as np
from bbox_iou import bbox_iou

def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
    classes_in_img = list(set(bboxes[:, 5]))
    best_bboxes = []

    for cls in classes_in_img:
        cls_mask = (bboxes[:, 5] == cls)
        cls_bboxes = bboxes[cls_mask]

        # 1. 경계 상자의 개수가 0보다 큰지 확인  
        while len(cls_bboxes) > 0:
            # 2. 가장 높은 점수를 갖는 경계 상자를 선택 
            max_ind = np.argmax(cls_bboxes[:, 4])
            best_bbox = cls_bboxes[max_ind]
            best_bboxes.append(best_bbox)
            cls_bboxes = np.concatenate(
                [cls_bboxes[: max_ind], 
                 cls_bboxes[max_ind + 1:]])
  
            # 3. 경계 상자의 모든 iou를 계산하고 iou 값이 임계값보다 높은 경계 상자를 제거 
            iou = bbox_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4], method='iou', isTrain=False)
            weight = np.ones((len(iou),), dtype=np.float32)

            assert method in ['nms', 'soft-nms']

            if method == 'nms':
                iou_mask = iou > iou_threshold
                weight[iou_mask] = 0.0 

            if method == 'soft-nms':
                weight = np.exp(-(1.0 * iou ** 2 / sigma))

            cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
            score_mask = cls_bboxes[:, 4] > 0. 
            cls_bboxes = cls_bboxes[score_mask]

    return best_bboxes

2025-12-22 16:41:35.333014: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-12-22 16:41:35.374218: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-22 16:41:36.484066: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


### 사각형 그리기

In [3]:
import colorsys
import random
import numpy as np
import cv2

def draw_bbox(image, bboxes, class_names,
              show_label=True, show_confidence=True,
              Text_colors=(0,0,0), rectangle_colors='', 
              tracking=False):
    image_h, image_w, _ = image.shape
    num_classes = len(class_names)

    hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))

    random.seed(0)
    random.shuffle(colors)
    random.seed(None)

    for i, bbox in enumerate(bboxes):
        coor = np.array(bbox[:4], dtype=np.int32)
        score = bbox[4]
        class_ind = int(bbox[5])
        bbox_color = rectangle_colors if rectangle_colors != '' else colors[class_ind]
        bbox_thick = int(0.6 * (image_h + image_w) / 1000)
        if bbox_thick < 1: bbox_thick = 1 
        fontScale = 0.75 * bbox_thick
        x1, y1 = coor[0], coor[1]
        x2, y2 = coor[2], coor[3]

        # 경계상자 그리기 
        cv2.rectangle(image, (x1, y1), (x2, y2), 
                      bbox_color, bbox_thick * 2)

        if show_label:
            score_str = "" 
            if show_confidence:
                score_str = " {:.2f}".format(score)
            if tracking: 
                score_str = " " + str(score)

            try:
                label = f"{class_names[class_ind]}{score_str}"
            except KeyError:
                print("클래스 라벨이 잘못되었습니다.")

            # 텍스트 크기 
            (text_width, text_height), baseline = cv2.getTextSize(
                label, cv2.FONT_HERSHEY_COMPLEX_SMALL,
                fontScale, thickness=bbox_thick)
            # 텍스트를 출력할 채워진 사각형 
            cv2.rectangle(image, (x1, y1), 
                          (x1 + text_width,
                           y1 - text_height - baseline),
                          bbox_color, thickness=cv2.FILLED)
            # 사각형 위에 텍스트 출력 
            cv2.putText(image, label, (x1, y1 - 4), 
                        cv2.FONT_HERSHEY_COMPLEX_SMALL,
                        fontScale, Text_colors, bbox_thick,
                        lineType=cv2.LINE_AA)
    return image

# 실시간 객체 탐지 테스트

## detect_image() - 객체 탐지 후 바운딩박스 그리기

In [4]:
import numpy as np
import cv2
import tensorflow as tf

from image_process import resize_to_square
from data import read_class_names
from post_process import *
from config import NUM_CLASS

def detect_image(model, image_path, output_path,
                 class_label_path,
                 input_size=416, show=False,
                 score_threshold=0.3, iou_threshold=0.45,
                 rectangle_colors=''):

    original_image = cv2.imread(image_path)
    if original_image is None:
        raise ValueError(f"Image not found: {image_path}")

    class_names = read_class_names(class_label_path)

    # 1️⃣ letterbox resize
    image_data = resize_to_square(
        np.copy(original_image),
        target_size=input_size
    )
    image_data = image_data[np.newaxis, ...].astype(np.float32)

    # 2️⃣ forward
    pred_bbox = model(image_data, training=False)

    # 3️⃣ flatten
    pred_bbox = [
        tf.reshape(p, (-1, 5 + NUM_CLASS))
        for p in pred_bbox
    ]
    pred_bbox = tf.concat(pred_bbox, axis=0).numpy()
    print("pred_bbox shape:", pred_bbox.shape)
    print(np.unique(pred_bbox[:, -1].astype(int)))
    print("pred_bbox sample:", pred_bbox[:5])

    # 4️⃣ post-process
    bboxes = postprocess_boxes(
        pred_bbox,
        original_image,
        input_size,
        score_threshold
    )
    print("after postprocess:", len(bboxes))


    if len(bboxes) == 0:
        print("⚠️ No objects detected")

    bboxes = nms(bboxes, iou_threshold)
    print(bboxes[:5])

    # 5️⃣ draw
    image = draw_bbox(
        original_image,
        bboxes,
        class_names,
        rectangle_colors=rectangle_colors
    )

    if output_path:
        cv2.imwrite(output_path, image)

    if show:
        cv2.imshow("predicted image", image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

    return image



## 학습된 모델 불러오기

In [5]:
from yolov3 import Create_YOLOv3
from config import NUM_CLASS

yolo = Create_YOLOv3(
    num_class=NUM_CLASS,
    input_shape=(416, 416, 3),
    train_mode=False   # 반드시 False
)

yolo.load_weights("checkpoints/mnist_custom_best.weights.h5")
# weights = yolo.get_weights()

I0000 00:00:1766389297.646790   48085 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 21751 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090 Ti, pci bus id: 0000:01:00.0, compute capability: 8.6


## 객체 탐지하기

In [None]:
# yolo.set_weights(weights)
detect_image(
    model=yolo,
    image_path="mnist_test_c.jpg",
    output_path="result.jpg",
    class_label_path="dataset/mnist.names",
    show=True,
    score_threshold=0.65,
    iou_threshold=0.3
)

2025-12-22 16:41:39.741025: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 90300


pred_bbox shape: (10647, 15)
[0]
pred_bbox sample: [[ 3.1304078   4.3585496  15.216962   15.615939    0.04006871  0.03514211
   0.27306983  0.14180715  0.10966711  0.1566938   0.15996712  0.05626611
   0.07805344  0.14546919  0.03759608]
 [ 3.5414913   4.0433016  19.699347   17.049362    0.0412945   0.07555261
   0.33621508  0.16459239  0.14181462  0.19127585  0.22862221  0.14350712
   0.13137875  0.09163492  0.08041906]
 [ 3.6070492   3.8605068  32.306435   30.86019     0.03892223  0.08543364
   0.3843721   0.19670457  0.14383763  0.15239574  0.30364197  0.13295338
   0.30511066  0.09308022  0.12543218]
 [11.191127    4.0778008  14.773755   15.92569     0.04225546  0.03658172
   0.18007363  0.17268106  0.11307348  0.21968305  0.14273582  0.05624708
   0.05385464  0.12528646  0.03609953]
 [11.768534    3.7196596  18.033245   15.619322    0.03993796  0.088209
   0.24415372  0.18728782  0.16341476  0.25128922  0.19286479  0.14425051
   0.0989953   0.09563424  0.07515527]]
after postproce