# YOLOv3

## YOLOv3의 합성곱과 Residual(yolo.py)

### 라이브러리 불러오기

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.regularizers import l2

### 합성곱함수

#### BatchNormalization()

In [None]:
class BatchNormalization(layers.BatchNormalization):
    # "동결 상태(Frozen state)"와 "추론 모드(Inference mode)"는 별개의 개념입니다. 
    # 'layer.trainable=False' 이면 레이어를 동결시킵니다. 이것은 훈련하는 동안 내부 상태 즉, 가중치가 바뀌지 않습니다.
    # 그런데 layer.trainable=False이면 추론 모드로 실행됩니다. 
    # 레이어는 추론모드에서 현재 배치의 평균 및 분산을 사용하는 대신 현재 배치를 정규화하기 위해 이동 평균과 이동 분산을 사용합니다.
    def call(self, x, training=False):
        if not training:
            training = tf.constant(False)
        training = tf.logical_and(training, self.trainable)
        return super().call(x, training)


#### convolutional()

In [None]:
def convolutional(input_layer, filters, kernel_size,
                  downsample=False, activate=True, bn=True):
    if downsample:
        input_layer = layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
        padding = 'valid'
        strides = 2
    else:
        strides = 1
        padding = 'same'

    kernel_init = tf.random_normal_initializer(stddev=0.01)
    conv = layers.Conv2D(filters=filters, 
                         kernel_size=kernel_size,
                         strides=strides, padding=padding, 
                         use_bias=not bn,
                         kernel_initializer=kernel_init,
                         kernel_regularizer=l2(0.0005)
                        )(input_layer)
    if bn:
        conv = BatchNormalization()(conv)
    if activate:
        conv = layers.LeakyReLU(alpha=0.1)(conv)

    return conv

### 레지듀얼 블록

#### residual_block()

In [None]:
def residual_block(input_layer, filter_num1, filter_num2):
    short_cut = input_layer
    conv = convolutional(input_layer, filters=filter_num1, kernel_size=(1,1))
    conv = convolutional(conv       , filters=filter_num2, kernel_size=(3,3))
    residual_output = short_cut + conv
    return residual_output

## 다크넷 정의

In [None]:
def darknet53(input_data):
    input_data = convolutional(input_data, 32, (3,3))
    input_data = convolutional(input_data, 64, (3,3), downsample=True)

    for i in range(1):
        input_data = residual_block(input_data,  32, 64)

    input_data = convolutional(input_data, 128, (3,3), downsample=True)

    for i in range(2):
        input_data = residual_block(input_data, 64, 128)

    input_data = convolutional(input_data, 256, (3,3), downsample=True)

    for i in range(8):
        input_data = residual_block(input_data, 128, 256)

    route_1 = input_data
    input_data = convolutional(input_data, 512, (3,3), downsample=True)

    for i in range(8):
        input_data = residual_block(input_data, 256, 512)

    route_2 = input_data
    input_data = convolutional(input_data, 1024, (3,3), downsample=True)

    for i in range(4):
        input_data = residual_block(input_data, 512, 1024)

    return route_1, route_2, input_data

## upsample() - 업샘플링

In [None]:
def upsample(input_layer):
    width, height = input_layer.shape[1], input_layer.shape[2]
    output_layer = tf.image.resize(input_layer, (width*2, height*2), 
                                   method='nearest')
    return output_layer

## YOLOv3 함수 정의

In [None]:
def YOLOv3(input_layer, num_class):
    # Darknet-53을 실행하고 그 결과를 받음
    route_1, route_2, conv = darknet53(input_layer)
    
    conv = convolutional(conv, 512, (1,1))
    conv = convolutional(conv, 1024, (3,3))
    conv = convolutional(conv, 512, (1,1))
    conv = convolutional(conv, 1024, (3,3))
    conv = convolutional(conv, 512, (1,1))
    conv_lobj_branch = convolutional(conv, 1024, (3,3))
    
    # conv_lbbox는 큰 객체를 예측하기 위해 사용, Shape = [None, 13, 13, 255] 
    conv_lbbox = convolutional(conv_lobj_branch, 
                               3*(num_class+5), (1,1),
                               activate=False, bn=False)
    
    conv = convolutional(conv, 256, (1,1))
    # 최근방법(nearest)을 이용하여 업샘플링
    # 이렇게 하면 업샘플링시 학습이 필요 없으므로 인공신경망 파라미터를 줄인다.
    conv = upsample(conv)

    conv = tf.concat([conv, route_2], axis=-1)
    conv = convolutional(conv, 256, (1,1))
    conv = convolutional(conv, 512, (3,3))
    conv = convolutional(conv, 256, (1,1))
    conv = convolutional(conv, 512, (3,3))
    conv = convolutional(conv, 256, (1,1))
    conv_mobj_branch = convolutional(conv, 512, (3,3))

    # conv_mbbox는 중간 크기 객체를 예측하기 위해 사용, shape = [None, 26, 26, 255]
    conv_mbbox = convolutional(conv_mobj_branch, 
                               3*(num_class+5), (1,1),
                               activate=False, bn=False)

    conv = convolutional(conv, 128, (1,1))
    conv = upsample(conv)

    conv = tf.concat([conv, route_1], axis=-1)
    conv = convolutional(conv, 128, (1,1))
    conv = convolutional(conv, 256, (3,3))
    conv = convolutional(conv, 128, (1,1))
    conv = convolutional(conv, 256, (3,3))
    conv = convolutional(conv, 128, (1,1))
    conv_sobj_branch = convolutional(conv, 256, (3,3))
    
    # conv_sbbox는 작은 객체를 예측하기 위해 사용, shape = [None, 52, 52, 255]
    conv_sbbox = convolutional(conv_sobj_branch, 
                               3*(num_class+5), (1,1),
                               activate=False, bn=False)
        
    return [conv_sbbox, conv_mbbox, conv_lbbox]

## 합성곱 신경망의 출력을 디코딩 함

In [None]:
import numpy as np

YOLO_STRIDES  = [8, 16, 32]
YOLO_ANCHORS  = [[[10,  13], [16,   30], [33,   23]],
                 [[30,  61], [62,   45], [59,  119]],
                 [[116, 90], [156, 198], [373, 326]]]
STRIDES       = np.array(YOLO_STRIDES)
ANCHORS       = (np.array(YOLO_ANCHORS).T/STRIDES).T

In [None]:
def decode(conv_output, num_class, i=0):
    conv_shape       = tf.shape(conv_output)
    batch_size       = conv_shape[0]
    output_size      = conv_shape[1]

    conv_output = tf.reshape(conv_output, 
                             (batch_size, output_size, output_size, 
                              3, num_class+5))

    conv_raw_dxdy = conv_output[:, :, :, :, 0:2] # 상자의 x, y위치
    conv_raw_dwdh = conv_output[:, :, :, :, 2:4] # 상자의 가로, 세로 크기
    conv_raw_conf = conv_output[:, :, :, :, 4:5] # 상자의 신뢰도(confidence)
    conv_raw_prob = conv_output[:, :, :, :, 5: ] # 클래스별 확률

    # next need Draw the grid. Where output_size is equal to 13, 26 or 52  
    y = tf.range(output_size, dtype=tf.int32)
    y = tf.expand_dims(y, -1)
    y = tf.tile(y, [1, output_size])
    x = tf.range(output_size, dtype=tf.int32)
    x = tf.expand_dims(x, 0)
    x = tf.tile(x, [output_size, 1])

    xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
    xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], 
                      [batch_size, 1, 1, 3, 1])
    xy_grid = tf.cast(xy_grid, tf.float32)

    # 상자의 중심점을 계산
    pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i]
    # 상자의 너비와 높이를 계산
    pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]

    pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
    pred_conf = tf.sigmoid(conv_raw_conf) # 상자의 신뢰도 계산
    pred_prob = tf.sigmoid(conv_raw_prob) # 클래스별 확률 계산

    return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)

## YOLOv3 모델 생성

In [None]:
def Create_YOLOv3(num_class, input_shape=(416,416,3), train_mode=False):
    input_layer  = layers.Input(input_shape)
    conv_tensors = YOLOv3(input_layer, num_class)
    output_tensors = []
    for i, conv_tensor in enumerate(conv_tensors):
        pred_tensor = decode(conv_tensor, num_class, i)
        if train_mode: output_tensors.append(conv_tensor)
        output_tensors.append(pred_tensor)

    model = tf.keras.Model(input_layer, output_tensors)
    return model

In [None]:
NUM_CLASS = 10
yolo = Create_YOLOv3(train_mode=True, num_class=NUM_CLASS)
# yolo.summary()

# 모델 만들고 학습시키기

## 이미지 전처리하기(image_process.py)

In [None]:
import random
import numpy as np
import cv2


# 이미지를 정사각형 크기로 변환, 
# 채워지는 화소 기본값은 value속성의 값으로 설정함
def resize_to_square(image, target_size, gt_boxes=None, value=128.0):
    ih, iw = target_size, target_size
    h, w, _ = image.shape

    scale = min(iw / w, ih / h)
    nw, nh = int(scale * w), int(scale * h)
    image_resized = cv2.resize(image, (nw, nh))

    image_padded = np.full(shape=[ih, iw, 3], fill_value=value)
    dw, dh = (iw - nw) // 2, (ih - nh) // 2
    image_padded[dh:nh + dh, dw:nw + dw, :] = image_resized
    image_padded = image_padded / 255.

    if gt_boxes is None:
        return image_padded
    else:
        gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
        gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
        return image_padded, gt_boxes
    

def random_horizontal_flip(image, bboxes, p=0.5):
    if random.random() < p:
        _, w, _ = image.shape
        image = image[:, ::-1, :]
        bboxes[:, [0, 2]] = w - bboxes[:, [2, 0]]

    return image, bboxes

# 자르기 
def random_crop(image, bboxes, p=0.5):
    if random.random() < p:
        h, w, _ = image.shape
        max_bbox = np.concatenate( 
            [np.min(bboxes[:, 0:2], axis=0), 
             np.max(bboxes[:, 2:4], axis=0)], axis=-1)

        max_l_trans = max_bbox[0]
        max_u_trans = max_bbox[1]
        max_r_trans = w - max_bbox[2]
        max_d_trans = h - max_bbox[3]

        crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans)))
        crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans)))
        crop_xmax = max(w, int(max_bbox[2] + random.uniform(0, max_r_trans)))
        crop_ymax = max(h, int(max_bbox[3] + random.uniform(0, max_d_trans)))

        image = image[crop_ymin:crop_ymax, crop_xmin:crop_xmax]

        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin
  
    return image, bboxes

  
# 이동 
def random_translate(image, bboxes, p=0.5):
    if random.random() < p:
        h, w, _ = image.shape
        max_bbox = np.concatenate( 
            [np.min(bboxes[:, 0:2], axis=0),
             np.max(bboxes[:, 2:4], axis=0)], axis=-1)

        max_l_trans = max_bbox[0]
        max_u_trans = max_bbox[1]
        max_r_trans = w - max_bbox[2]
        max_d_trans = h - max_bbox[3]

        tx = random.uniform(-(max_l_trans-1), (max_r_trans-1))
        ty = random.uniform(-(max_u_trans-1), (max_d_trans-1))

        M = np.array([[1, 0, tx], [0, 1, ty]])
        image = cv2.warpAffine(image, M, (w, h))

        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty

    return image, bboxes



## IoU 계산하기(bbox_iou.py)

In [None]:
import tensorflow as tf

def bbox_iou(boxes1, boxes2):
    boxes1_area = boxes1[..., 2] * boxes1[..., 3]
    boxes2_area = boxes2[..., 2] * boxes2[..., 3]

    boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, 
                        boxes1[..., :2] + boxes1[..., 2:] * 0.5],
                       axis=-1)
    boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, 
                        boxes2[..., :2] + boxes2[..., 2:] * 0.5],
                       axis=-1)

    left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])

    inter_section = tf.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    union_area = boxes1_area + boxes2_area - inter_area

    return 1.0 * inter_area / union_area
 
# GIoU 계산하는 함수 
def bbox_giou(boxes1, boxes2):
    boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
                        boxes1[..., :2] + boxes1[..., 2:] * 0.5],
                       axis=-1)
    boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
                        boxes2[..., :2] + boxes2[..., 2:] * 0.5],
                       axis=-1)

    boxes1 = tf.concat([tf.minimum(boxes1[...,:2], boxes1[...,2:]),
                        tf.maximum(boxes1[...,:2], boxes1[...,2:])], 
                       axis=-1)
    boxes2 = tf.concat([tf.minimum(boxes2[...,:2], boxes2[...,2:]),
                        tf.maximum(boxes2[...,:2], boxes2[...,2:])],
                       axis=-1)

    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

    left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])

    inter_section = tf.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    union_area = boxes1_area + boxes2_area - inter_area
  
    # 두 경계 상자의 IoU를 계산 
    iou = inter_area / union_area

    # 왼쪽 위와 오른쪽 아래를 포함하는 가장 작은 사각형 계산 
    enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
    enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
    enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
  
    # 가장 작은 C 상자의 면적 계산 
    enclose_area = enclose[..., 0] * enclose[..., 1]
  
    # GIoU 공식으로 GIoU 계산 
    giou = iou - 1.0 * (enclose_area - union_area) / enclose_area

    return giou
 
# CIoU 계산하는 함수 
def bbox_ciou(boxes1, boxes2):
    boxes1_coor = tf.concat([boxes1[...,:2] - boxes1[...,2:] * 0.5, 
                             boxes1[...,:2] + boxes1[...,2:] * 0.5], 
                            axis=-1)
    boxes2_coor = tf.concat([boxes2[...,:2] - boxes2[...,2:] * 0.5, 
                             boxes2[...,:2] + boxes2[...,2:] * 0.5], 
                            axis=-1)

    left = tf.maximum(boxes1_coor[..., 0], boxes2_coor[..., 0])
    up = tf.maximum(boxes1_coor[..., 1], boxes2_coor[..., 1])
    right = tf.maximum(boxes1_coor[..., 2], boxes2_coor[..., 2])
    down = tf.maximum(boxes1_coor[..., 3], boxes2_coor[..., 3])

    c = (right - left) * (right - left) + (up - down) * (up - down)
    iou = bbox_iou(boxes1, boxes2)

    u = (boxes1[..., 0] - boxes2[..., 0]) * (boxes1[..., 0] - boxes2[..., 0]) + (boxes1[..., 1] - boxes2[..., 1]) * (boxes1[..., 1] - boxes2[..., 1])
    d = u / c

    ar_gt = boxes2[..., 2] / boxes2[..., 3]
    ar_pred = boxes1[..., 2] / boxes1[..., 3]

    ar_loss = 4 / (np.pi * np.pi) * (tf.atan(ar_gt) - tf.atan(ar_pred)) * (tf.atan(ar_gt) - tf.atan(ar_pred))
    alpha = ar_loss / (1 - iou + ar_loss + 0.000001)
    ciou_term = d + alpha * ar_loss
 
    return iou - ciou_term

## 스트라이드와 앵커박스(config.py)

In [None]:
import numpy as np

YOLO_STRIDES  = [8, 16, 32]
YOLO_ANCHORS  = [[[10,  13], [16,   30], [33,   23]],
                 [[30,  61], [62,   45], [59,  119]],
                 [[116, 90], [156, 198], [373, 326]]]

STRIDES       = np.array(YOLO_STRIDES)
ANCHORS       = (np.array(YOLO_ANCHORS).T/STRIDES).T

NUM_CLASS     = 10 # COCO 데이터이면 80, MNIST 데이터이면 10 

## 데이터 생성기(data.py)

In [None]:
import os
import random
import numpy as np
import cv2

from config import *
from image_process import *
from bbox_iou import *


# 파일에서 클래스 라벨을 읽어 딕셔너리로 만들어 반환
def read_class_names(class_label_path):
    names = {}
    with open(class_label_path, 'r') as data:
        for ID, name in enumerate(data):
            names[ID] = name.strip('\n')
    return names


class DataGenerator(object):
    def __init__(self,
                 data_path,
                 annot_path,
                 class_label_path,
                 load_images_to_ram=True,
                 data_aug=True,
                 input_size=416,
                 anchor_per_scale=3,
                 max_bbox_per_scale=100, 
                 batch_size=4,
                 strides=STRIDES, 
                 anchors=ANCHORS):
        self.input_size = input_size
        self.annot_path = annot_path
        self.batch_size = batch_size
        self.data_aug = False
        self.strides = strides
        self.classes = read_class_names(class_label_path)
        self.num_classes = len(self.classes)
        self.anchors = anchors
        self.anchor_per_scale = anchor_per_scale
        self.max_bbox_per_scale = max_bbox_per_scale
        self.load_images_to_ram = load_images_to_ram
        self.annotations = self.load_annotations(annot_path)
        self.num_samples = len(self.annotations)
        self.num_batchs = int(np.ceil(self.num_samples / self.batch_size)) 
        self.batch_count = 0 
        self.output_sizes = input_size // strides

    # 아노테이션 경로에서 데이터파일을 읽어옴 
    def load_annotations(self, annot_path):
        # C:\mnist_test\000009.jpg 
        # 156,153,178,175,9 278,294,300,316,0 
        annotations = []

        with open(self.annot_path, 'r') as f:
            # 파일에서 데이터를 불러와 라인별로 자름 
            data = f.read().splitlines()
            # 공백으로 잘라 맨 앞의 파일경로제외하고  
            # 길이가0이 아닌 행들을 리스트로 만들어 놓음 
            # 파일명만 있는 행 제거 
            # (객체가 없는 이미지의 어노테이션 데이터임) 
            lines = [line.strip() for line in data if len(line.strip().split()[1:]) != 0]
        # 랜덤하게 섞음 
        np.random.shuffle(lines)

        for line in lines:
            # 공백으로 나눔 
            # 예: line=['C:\mnist_test\000009.jpg', 
            # 156,153,178,175,9', '278,294,300,316,0'] 
            annotation = line.split()
            image_path, index = "", 1
            for i, one_line in enumerate(annotation):
                if not one_line.replace(",","").isnumeric():
                    if image_path != "": 
                        image_path += " "
                    image_path += one_line
                else:
                    index = i
                    break

            # 어노테이션 이미지파일이 없으면 예외 발생시킴 
            if not os.path.exists(image_path):
                raise KeyError(f"{image_path} 파일이 없음")

            # 램 사용하면 이미지를 메모리에 저장 후 사용 
            # 램 사용하지 않으면 
            #    __next__에서 parse_annotation을 실행, 
            #    parse_annotation에서 이미지가 로드됨 
            if self.load_images_to_ram:
                image = cv2.imread(image_path)
            else:
                image = '' 

            # [['C:\mnist_test\000009.jpg', 
            # [156,153,178,175,9', '278,294,300,316,0'], ''], ... ] 
            annotations.append([image_path, annotation[index:], image])

        return annotations

    # 아노테이션 데이터 파싱 
    def parse_annotation(self, annotation, mAP='False'):
        if self.load_images_to_ram:
            image_path = annotation[0]
            image = annotation[2]
        else:
            image_path = annotation[0]
            image = cv2.imread(image_path) # 이미지를 불러옴 

        #  [[156,153,178,175,9], [278,294,300,316,0]] 
        bboxes = np.array([list(map(int, box.split(','))) for box in annotation[1]])

        # 이미지 증강 - 숫자, 문자는 좌/우 반전이 필요 없음 
        # 이미지를 변환하면 경계 상자도 같이 바꿔줘야 함 
        if self.data_aug:
            # 좌/우 반전(생략) 
#             image, bboxes = random_horizontal_flip(np.copy(image), np.copy(bboxes)) 
            # 자르기 
            image, bboxes = random_crop(np.copy(image),
                                        np.copy(bboxes))  
            # 이동 
            image, bboxes = random_translate(np.copy(image),
                                             np.copy(bboxes))

        # mAP=True이면 image, bbox를 반환
        if mAP==True:
            return image, bboxes
        
        image, bboxes = resize_to_square(np.copy(image), self.input_size, np.copy(bboxes))

        return image, bboxes
 
    # 상자 전처리 
    def preprocess_true_boxes(self, bboxes):
        # 스트라이드의 수 만큼 출력 레벨이 만들어짐 
        OUTPUT_LEVELS = len(self.strides)

        # output_size = 416/[8, 16, 32] = [52, 26, 13] -> N
        # anchor_per_scale = 3, num_classes = 10(MNIST일 경우)
        # 출력 레벨 수 만큼 (N,N,3,15) 모양의 라벨 배열 초기화
        label = [np.zeros((self.output_sizes[i],
                           self.output_sizes[i],
                           self.anchor_per_scale,
                           5 + self.num_classes))
                 for i in range(OUTPUT_LEVELS)]
        # max_bbox_per_scale = 100 
        # 출력 레벨 수 만큼 (100,4) 모양 경계상자 배열 초기화 
        bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4))
                       for _ in range(OUTPUT_LEVELS)]
        # 출력 레벨 수 만큼 상자 수 배열 초기화 
        bbox_count = np.zeros((OUTPUT_LEVELS,))

        # 모든 상자 수 만큼 실행 
        for bbox in bboxes:
            # 상자 좌표 
            bbox_coor = bbox[:4]
            # 상자 클래스 라벨 
            bbox_class_ind = bbox[4]
            # 상자의 클래스 라벨 원-핫 인코딩
            onehot = np.zeros(self.num_classes, dtype=np.float64) 
            onehot[bbox_class_ind] = 1.0

            # 원-핫 라벨 평활화(Label Smoothing) 
            # 레이블 정규화라고 부르기도 함 
            # 손실함수가 cross entropy이고,
            # 활성화 함수를 softmax를 사용할 때 적용 
            # 가장 큰 벡터가 나머지 벡터보다 커지는 것을 억제 
            # 공식: y_ls = (1-alpha)*y_onehot + alpha/K 
            K = self.num_classes
            alpha = 0.01 
            smooth_onehot = (1-alpha)*onehot + alpha/K 

            # 상자 좌표를 상자 x,y,w,h로 변환 후 표준화 
            bbox_xywh = np.concatenate(
                [(bbox_coor[2:] + bbox_coor[:2]) * 0.5,
                 bbox_coor[2:] - bbox_coor[:2]], axis=-1)
            bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis] 

            iou = []
            exist_positive = False 
            for i in range(OUTPUT_LEVELS):  # range(3): 
                # 앵커박스 
                anchors_xywh = np.zeros((self.anchor_per_scale, 4))
                anchors_xywh[:, 0:2] = np.floor(
                    bbox_xywh_scaled[i, 0:2]).astype(np.int32)+0.5
                anchors_xywh[:, 2:4] = self.anchors[i]

                # 실제 박스와 앵커박스 IoU계산 
                iou_scale = bbox_iou(
                    bbox_xywh_scaled[i][np.newaxis, :],
                    anchors_xywh)
                iou.append(iou_scale)

                # IoU가 0.3 이상인 박스만 처리함 
                iou_mask = iou_scale > 0.3 
                if np.any(iou_mask):
                    xi, yi = np.floor(
                        bbox_xywh_scaled[i, 0:2]).astype(np.int32) 

                    label[i][yi, xi, iou_mask, :] = 0 
                    label[i][yi, xi, iou_mask, 0:4] = bbox_xywh
                    label[i][yi, xi, iou_mask, 4:5] = 1.0 
                    label[i][yi, xi, iou_mask, 5:] = smooth_onehot

                    bbox_ind = int(                        bbox_count[i]%self.max_bbox_per_scale)
                    bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                    bbox_count[i] += 1 
                    exist_positive = True 
  
            if not exist_positive:
                bst_anc_idx = np.argmax(np.array(iou).reshape(-1),
                                        axis=-1)
                best_detect = int(bst_anc_idx / self.anchor_per_scale)
                best_anchor = int(bst_anc_idx % self.anchor_per_scale)
                xi, yi = np.floor(
                    bbox_xywh_scaled[best_detect,
                                     0:2]).astype(np.int32)

                label[best_detect][yi, xi, best_anchor, :] = 0 
                label[best_detect][yi, xi,
                                   best_anchor, 0:4] = bbox_xywh 
                label[best_detect][yi, xi,
                                   best_anchor, 4:5] = 1.0 
                label[best_detect][yi, xi,
                                   best_anchor, 5:] = smooth_onehot 

                bbox_ind = int(bbox_count[best_detect] % self.max_bbox_per_scale)
                bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh 
                bbox_count[best_detect] += 1 

        label_sbbox, label_mbbox, label_lbbox = label
        sbboxes, mbboxes, lbboxes = bboxes_xywh
        output_boxes = label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes
        return output_boxes 

    def __len__(self):
        return self.num_batchs
  
    def __iter__(self):
        return self 
 
    # 배치 크기만큼 이미지와 라벨 박스를 반환 
    def __next__(self):
        with tf.device('/cpu:0'):
            # 배치 이미지를 갖는 배열 
            batch_image = np.zeros( 
                (self.batch_size,
                 self.input_size,
                 self.input_size,
                 3), dtype=np.float32)
 
            # 배치 라벨(small, middle, large) 경계 상자 
            batch_label_sbbox = np.zeros(
                (self.batch_size,
                 self.output_sizes[0],
                 self.output_sizes[0],
                 self.anchor_per_scale,
                 5 + self.num_classes), dtype=np.float32)
            batch_label_mbbox = np.zeros( 
                (self.batch_size,
                 self.output_sizes[1],
                 self.output_sizes[1],
                 self.anchor_per_scale,
                 5 + self.num_classes), dtype=np.float32)
            batch_label_lbbox = np.zeros( 
                (self.batch_size,
                 self.output_sizes[2], 
                 self.output_sizes[2], 
                 self.anchor_per_scale,
                 5 + self.num_classes), dtype=np.float32)
 
            # 배치 크기만큼 경계 상자를 저장할 변수 
            batch_sbboxes = np.zeros(
                (self.batch_size,
                 self.max_bbox_per_scale, 4),
                dtype=np.float32)
            batch_mbboxes = np.zeros(
                (self.batch_size, 
                 self.max_bbox_per_scale, 4),
                dtype=np.float32)
            batch_lbboxes = np.zeros( 
                (self.batch_size,
                 self.max_bbox_per_scale, 4),
                dtype=np.float32)

            exceptions = False 
            num = 0 
            if self.batch_count < self.num_batchs:
                # 배치 크기만큼 실행   
                while num < self.batch_size:  
                    index = self.batch_count * self.batch_size + num 
                    if index >= self.num_samples: 
                        index -= self.num_samples
                    annotation = self.annotations[index]
                    image, bboxes = self.parse_annotation( annotation) 
                    try:
                        label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.preprocess_true_boxes(bboxes) 
                    except IndexError:
                        exceptions = True 
                        print("IndexError,", annotation[0])

                    batch_image[num,:,:,:] = image 
                    batch_label_mbbox[num,:,:,:,:] = label_mbbox 
                    batch_label_lbbox[num,:,:,:,:] = label_lbbox 
                    batch_mbboxes[num,:,:] = mbboxes 
                    batch_lbboxes[num,:,:] = lbboxes 
                    batch_label_sbbox[num,:,:,:,:] = label_sbbox 
                    batch_sbboxes[num,:,:] = sbboxes 
                    num += 1 

                if exceptions:
                    print('\n')
                    raise Exception("데이터셋에 문제가 있습니다.")

                self.batch_count += 1 
                batch_sm_target = batch_label_sbbox, batch_sbboxes 
                batch_md_target = batch_label_mbbox, batch_mbboxes 
                batch_lg_target = batch_label_lbbox, batch_lbboxes 

                target=(batch_sm_target,batch_md_target,batch_lg_target) 
                return batch_image, target
            else:
                self.batch_count = 0
                np.random.shuffle(self.annotations)
                raise StopIteration

## GPU 사용 설정

In [None]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
print(f'GPUs {gpus}')

if len(gpus) > 0:
    try: 
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError: 
        pass

## 상수 정의

In [None]:
LOGDIR = "logs" # 학습로그를 저장할 디렉토리

WARMUP_EPOCHS = 2
EPOCHS = 100

SAVE_BEST_ONLY        = True              # val loss가 가장 좋은 모델을 저장, True 권장
SAVE_CHECKPOINT       = False             # True이면 학습 시 모든 유효한 모델을 저장함, False 권장
CHECKPOINTS_FOLDER    = "checkpoints"     # 모델이 저장될 디렉토리
MODEL_NAME            = "mnist_custom"    # 저장될 모델의 이름
SCORE_THRESHOLD       = 0.3

## 학습 로그 설정

In [None]:
import os
import shutil
import tensorflow as tf

LOGDIR = "logs" # 학습 로그를 저장할 디렉토리 

if os.path.exists(LOGDIR): 
    shutil.rmtree(LOGDIR) # 로그 디렉토리가 있으면 삭제 

writer = tf.summary.create_file_writer(LOGDIR)
validate_writer = tf.summary.create_file_writer(LOGDIR)

## compute_loss

In [None]:
def compute_loss(pred, conv, label, bboxes, 
                 i=0, iou_loss_thresh=0.45):
    conv_shape  = tf.shape(conv)
    batch_size  = conv_shape[0]
    output_size = conv_shape[1]
    input_size  = STRIDES[i] * output_size
    conv = tf.reshape(conv,
                      (batch_size, output_size, output_size,
                       3, 5 + NUM_CLASS))

    conv_raw_conf = conv[:, :, :, :, 4:5]
    conv_raw_prob = conv[:, :, :, :, 5:]

    pred_xywh     = pred[:, :, :, :, 0:4]
    pred_conf     = pred[:, :, :, :, 4:5]

    label_xywh    = label[:, :, :, :, 0:4]
    respond_bbox  = label[:, :, :, :, 4:5]
    label_prob    = label[:, :, :, :, 5:]

    giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), 
                          axis=-1)
    input_size = tf.cast(input_size, tf.float32)    

    bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
    giou_loss = respond_bbox * bbox_loss_scale * (1 - giou)

    # bbox_iou 
    iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :],
                   bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]) 

    # 실제 상자에서 가장 큰 예측값을 갖는 상자로 IoU 값 찾기 
    max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1),
                             axis=-1)

    # 가장 큰 iou가 임계값보다 작으면 예측 상자에 개체가 포함되지 않은 것으로 간주되고 배경 상자로 설정 
    respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < iou_loss_thresh, tf.float32 )

    conf_focal = tf.pow(respond_bbox - pred_conf, 2)

    # confidence의 손실 계산  
    # 그리드에 객체가 포함된 경우 1, 그렇지 않을경우 0  
    conf_loss = conf_focal * (
        respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=respond_bbox, logits=conv_raw_conf)
        + 
        respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=respond_bbox, logits=conv_raw_conf) 
    )

    prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
        labels=label_prob, logits=conv_raw_prob)

    giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4])) 
    conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4])) 
    prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4])) 

    return giou_loss, conf_loss, prob_loss

## 학습 단계 정의하기

In [None]:
def train_step(model, image_data, target, lr_init=1e-4, lr_end=1e-6):
    with tf.GradientTape() as tape:
        pred_result = model(image_data, training=True)
        giou_loss = conf_loss = prob_loss = 0

        # 손실값 계산 
        grid = 3
        for i in range(grid):
            conv, pred = pred_result[i*2], pred_result[i*2+1]
            loss_items = compute_loss(pred, conv, *target[i], i)
            giou_loss += loss_items[0]
            conf_loss += loss_items[1]
            prob_loss += loss_items[2]

        total_loss = giou_loss + conf_loss + prob_loss

        gradients = tape.gradient(total_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        # 학습률 업데이트 
        # 워밍업 참고: https://arxiv.org/abs/1812.01187
        global_steps.assign_add(1)
        if global_steps < warmup_steps:
            lr = global_steps / warmup_steps * lr_init
        else:
            lr = lr_end + 0.5 * (lr_init - lr_end) * ((1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi)))
        optimizer.lr.assign(lr.numpy())

        # Loss를 로그에 저장 
        with writer.as_default():
            tf.summary.scalar("lr", optimizer.lr,
                              step=global_steps)
            tf.summary.scalar("loss/total_loss", total_loss,
                              step=global_steps)
            tf.summary.scalar("loss/giou_loss", giou_loss,
                              step=global_steps)
            tf.summary.scalar("loss/conf_loss", conf_loss,
                              step=global_steps)
            tf.summary.scalar("loss/prob_loss", prob_loss,
                              step=global_steps)
        writer.flush()

    return global_steps.numpy(), optimizer.lr.numpy(), giou_loss.numpy(), conf_loss.numpy(), prob_loss.numpy(), total_loss.numpy()

## 검증 단계 정의하기

In [None]:
def validate_step(model, image_data, target):
    with tf.GradientTape() as tape:
        pred_result = model(image_data, training=False)
        giou_loss = conf_loss = prob_loss = 0 

        grid = 3 
        for i in range(grid):
            conv, pred = pred_result[i*2], pred_result[i*2+1]
            loss_items = compute_loss(pred, conv, *target[i], i)
            giou_loss += loss_items[0]
            conf_loss += loss_items[1]
            prob_loss += loss_items[2]

        total_loss = giou_loss + conf_loss + prob_loss

    return giou_loss.numpy(), conf_loss.numpy(), prob_loss.numpy(), total_loss.numpy()

# 모듈 불러와 학습 모델 만들기

## 데이터 생성기 만들기

In [None]:
import tensorflow as tf
from config import *
from data import DataGenerator 

trainset = DataGenerator(data_path=TRAIN_DATA_PATH,
                         annot_path=TRAIN_ANNOT_PATH,
                         class_label_path=CLASS_LABEL_PATH)
testset = DataGenerator(data_path=TEST_DATA_PATH, 
                        annot_path=TEST_ANNOT_PATH,
                        class_label_path=CLASS_LABEL_PATH)
steps_per_epoch = len(trainset)
global_steps = tf.Variable(1, trainable=False, dtype=tf.int64) 
warmup_steps = WARMUP_EPOCHS * steps_per_epoch
total_steps = EPOCHS * steps_per_epoch

optimizer = tf.keras.optimizers.Adam()

## 학습시키기

In [None]:
import os
from config import *
from bbox_iou import bbox_iou, bbox_giou
from yolov3 import Create_YOLOv3
from train import *


yolo = Create_YOLOv3(num_class=NUM_CLASS, train_mode=True)

best_val_loss = 99999
save_directory = os.path.join(CHECKPOINTS_FOLDER, MODEL_NAME)

for epoch in range(EPOCHS):
    for image_data, target in trainset:
        results = train_step(yolo, image_data, target)
        cur_step = results[0] % steps_per_epoch
        print("epoch:{:2.0f} step:{:5.0f}/{}, lr:{:.6f}, giou_loss:{:7.2f}, conf_loss:{:7.2f}, prob_loss:{:7.2f}, total_loss:{:7.2f}".format(epoch, cur_step, steps_per_epoch, results[1], results[2], results[3], results[4], results[5]))
 
    if len(testset) == 0: 
        print("configure TEST options to validate model")
        yolo.save_weights(save_directory)
        continue 

    count = 0
    giou_val, conf_val, prob_val, total_val = 0, 0, 0, 0 

    for image_data, target in testset:
        results = validate_step(yolo, image_data, target)
        count += 1
        giou_val += results[0]
        conf_val += results[1]
        prob_val += results[2]
        total_val += results[3]

    # validation loss 저장 
    with validate_writer.as_default():
        tf.summary.scalar("validate_loss/total_val", total_val / count, step=epoch)
        tf.summary.scalar("validate_loss/giou_val", giou_val / count, step=epoch)
        tf.summary.scalar("validate_loss/conf_val", conf_val / count, step=epoch)
        tf.summary.scalar("validate_loss/prob_val", prob_val / count, step=epoch)
    validate_writer.flush()
    # print("giou_val_loss:{:7.2f}, conf_val_loss:{:7.2f}, prob_val_loss:{:7.2f}, total_val_loss:{:7.2f}\n\n".format( giou_val / count, conf_val / count, prob_val / count, total_val / count))

    if SAVE_CHECKPOINT and not SAVE_BEST_ONLY:
        save_directory = os.path.join(CHECKPOINTS_FOLDER,  MODEL_NAME + "_epoch_{:03d}_val_loss_{:7.2f}".format(epoch, total_val / count))

    # print(best_val_loss, total_val / count)

    if SAVE_BEST_ONLY:
        if best_val_loss > total_val / count:
            best_val_loss = total_val / count
            yolo.save_weights(save_directory)

In [None]:
# yolo.save_weights(save_directory)

# 예측 후 후처리

## 박스 후처리(postprocess_boxes)

In [None]:
import numpy as np

def postprocess_boxes(pred_bbox, original_image, input_size, score_threshold):

    valid_scale = [0, np.inf]
    pred_bbox = np.array(pred_bbox)

    pred_xywh = pred_bbox[:, 0:4]
    pred_conf = pred_bbox[:, 4]
    pred_prob = pred_bbox[:, 5:]

    # 1. (x, y, w, h) --> (xmin, ymin, xmax, ymax) 
    pred_coor = np.concatenate( 
        [pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
         pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)

    # 2. (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org) 
    org_h, org_w = original_image.shape[:2]
    resize_ratio = min(input_size/org_w, input_size/org_h)

    dw = (input_size - resize_ratio * org_w) / 2 
    dh = (input_size - resize_ratio * org_h) / 2 

    pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
    pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio

    # 3. 범위를 벗어나는 박스를 자름 
    pred_coor = np.concatenate(
        [np.maximum(pred_coor[:, :2], [0, 0]),
         np.minimum(pred_coor[:, 2:], [org_w-1, org_h-1])],
        axis=-1)
    invalid_mask = np.logical_or(
        (pred_coor[:, 0] > pred_coor[:, 2]),
        (pred_coor[:, 1] > pred_coor[:, 3]))
    pred_coor[invalid_mask] = 0 

    # 4. 유효하지 않은 상자 무시 
    bboxes_scale = np.sqrt(
        np.multiply.reduce(
            pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
    scale_mask = np.logical_and(
        (valid_scale[0] < bboxes_scale),
        (bboxes_scale < valid_scale[1]))

    # 5. 낮은 스코어의 상자 무시 
    classes = np.argmax(pred_prob, axis=-1)
    scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
    score_mask = scores > score_threshold
    mask = np.logical_and(scale_mask, score_mask)
    coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]

    return np.concatenate([coors, scores[:, np.newaxis], 
                           classes[:, np.newaxis]], axis=-1)

## 상자들의 IoU 계산하기

In [None]:
import numpy as np

def bboxes_iou(boxes1, boxes2):
    boxes1 = np.array(boxes1)
    boxes2 = np.array(boxes2)

    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

    left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])

    inter_section = np.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    union_area = boxes1_area + boxes2_area - inter_area

    ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)

    return ious

## NMS

In [None]:
import numpy as np

def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
    classes_in_img = list(set(bboxes[:, 5]))
    best_bboxes = []

    for cls in classes_in_img:
        cls_mask = (bboxes[:, 5] == cls)
        cls_bboxes = bboxes[cls_mask]

        # 1. 경계 상자의 개수가 0보다 큰지 확인  
        while len(cls_bboxes) > 0:
            # 2. 가장 높은 점수를 갖는 경계 상자를 선택 
            max_ind = np.argmax(cls_bboxes[:, 4])
            best_bbox = cls_bboxes[max_ind]
            best_bboxes.append(best_bbox)
            cls_bboxes = np.concatenate(
                [cls_bboxes[: max_ind], 
                 cls_bboxes[max_ind + 1:]])
  
            # 3. 경계 상자의 모든 iou를 계산하고 iou 값이 임계값보다 높은 경계 상자를 제거 
            iou = bboxes_iou(best_bbox[np.newaxis, :4],
                             cls_bboxes[:, :4])
            weight = np.ones((len(iou),), dtype=np.float32)

            assert method in ['nms', 'soft-nms']

            if method == 'nms':
                iou_mask = iou > iou_threshold
                weight[iou_mask] = 0.0 

            if method == 'soft-nms':
                weight = np.exp(-(1.0 * iou ** 2 / sigma))

            cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
            score_mask = cls_bboxes[:, 4] > 0. 
            cls_bboxes = cls_bboxes[score_mask]

    return best_bboxes

## 사각형 그리기

In [None]:
import colorsys
import random
import numpy as np
import cv2

def draw_bbox(image, bboxes, class_names,
              show_label=True, show_confidence=True,
              Text_colors=(0,0,0), rectangle_colors='', 
              tracking=False):
    image_h, image_w, _ = image.shape
    num_class = len(class_names)

    hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))

    random.seed(0)
    random.shuffle(colors)
    random.seed(None)

    for i, bbox in enumerate(bboxes):
        coor = np.array(bbox[:4], dtype=np.int32)
        score = bbox[4]
        class_ind = int(bbox[5])
        bbox_color = rectangle_colors if rectangle_colors != '' else colors[class_ind]
        bbox_thick = int(0.6 * (image_h + image_w) / 1000)
        if bbox_thick < 1: bbox_thick = 1 
        fontScale = 0.75 * bbox_thick
        x1, y1 = coor[0], coor[1]
        x2, y2 = coor[2], coor[3]

        # 경계상자 그리기 
        cv2.rectangle(image, (x1, y1), (x2, y2), 
                      bbox_color, bbox_thick * 2)

        if show_label:
            score_str = "" 
            if show_confidence:
                score_str = " {:.2f}".format(score)
            if tracking: 
                score_str = " " + str(score)

            try:
                label = f"{_class_names[class_ind]}{score_str}"
            except KeyError:
                print("클래스 라벨이 잘못되었습니다.")

            # 텍스트 크기 
            (text_width, text_height), baseline = cv2.getTextSize(
                label, cv2.FONT_HERSHEY_COMPLEX_SMALL,
                fontScale, thickness=bbox_thick)
            # 텍스트를 출력할 채워진 사각형 
            cv2.rectangle(image, (x1, y1), 
                          (x1 + text_width,
                           y1 - text_height - baseline),
                          bbox_color, thickness=cv2.FILLED)
            # 사각형 위에 텍스트 출력 
            cv2.putText(image, label, (x1, y1 - 4), 
                        cv2.FONT_HERSHEY_COMPLEX_SMALL,
                        fontScale, Text_colors, bbox_thick,
                        lineType=cv2.LINE_AA)
    return image

# 실시간 객체 탐지

## detect_image

In [None]:
import random
import numpy as np
import cv2
import tensorflow as tf

from image_process import resize_to_square
from data import read_class_names
from post_process import *

def detect_image(model, image_path, output_path,
                 class_label_path, 
                 input_size=416, show=False,
                 score_threshold=0.3, iou_threshold=0.45,
                 rectangle_colors=''):
    original_image = cv2.imread(image_path)
    class_names = read_class_names(class_label_path)

    image_data = resize_to_square(np.copy(original_image), target_size=input_size)
    image_data = image_data[np.newaxis, ...].astype(np.float32)

    pred_bbox = model.predict(image_data)

    pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
    pred_bbox = tf.concat(pred_bbox, axis=0)

    bboxes = postprocess_boxes(pred_bbox, original_image,
                               input_size, score_threshold)
    bboxes = nms(bboxes, iou_threshold, method='nms')

    image = draw_bbox(original_image, bboxes, class_names,
                      rectangle_colors=rectangle_colors)

    if output_path != '':
        cv2.imwrite(output_path, image)
    if show:
        cv2.imshow("predicted image", image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

    return image

In [None]:
import os
from config import *
from yolov3 import Create_YOLOv3

yolo = Create_YOLOv3(num_class=NUM_CLASS)
yolo.load_weights(os.path.join(CHECKPOINTS_FOLDER, MODEL_NAME))
# yolo.load_weights("checkpoints/mnist_custom")
weight = yolo.get_weights()

In [None]:
yolo.set_weights(weight)
result_image = detect_image(model=yolo,  
                            image_path="mnist_test_c.jpg",
                            output_path="mnist_test_out.jpg", 
                            class_label_path=CLASS_LABEL_PATH, 
                            input_size=416, show=True)

## Realtime

In [None]:
import os
import tensorflow as tf
from config import *
from image_process import resize_to_square
from data import read_class_names
from post_process import *
from yolov3 import Create_YOLOv3

yolo = Create_YOLOv3(num_class=NUM_CLASS)
yolo.load_weights(os.path.join(CHECKPOINTS_FOLDER, MODEL_NAME))
# yolo.load_weights("checkpoints/mnist_custom")
weights = yolo.get_weights()
class_names = read_class_names(CLASS_LABEL_PATH)

cap = cv2.VideoCapture(0)
if cap.isOpened():
    while True:
        yolo.set_weights(weights)
        ret, image = cap.read()
        if not ret:
            print("프레임을 받지 못했습니다.")
            break 

        # 밝기를 100만큼 더함 
        dummy = np.full(image.shape, fill_value=100, 
                        dtype=np.uint8)
        cv2.add(image, dummy, image)
                
        # 콘트라스트 강조함 
        image = cv2.normalize(image, None, 0, 255,
                              cv2.NORM_MINMAX)

        # 이미지를 정사각형 모양으로 만듬 
        image_data = resize_to_square(np.copy(image), 416)
        image_data = image_data[np.newaxis,
                                ...].astype(np.float32)

        # 상자 예측 
        pred_box = yolo.predict(image_data)
        pred_box = [tf.reshape(x, (-1, tf.shape(x)[-1])) 
                    for x in pred_box]
        pred_box = tf.concat(pred_box, axis=0)

        # 상자 후처리 
        bboxes = postprocess_boxes(pred_box, image, 416, 0.3)

        # NMS에 의해 해당 영역에서 상자 하나만 남김 
        bboxes = nms(bboxes, 0.45, method="nms")

        # 상자를 그림 
        image = draw_bbox(image, bboxes, class_names)

        cv2.imshow("Image", image)
        if cv2.waitKey(1) & 0xFF == 27:
            break
else:
    print('연결된 카메라가 없습니다.')

cap.release()
cv2.destroyAllWindows()

In [None]:
# yolo.summary()