# 辨識影像
## Step 1: 載入模型
請各位將下列幾項變數給予相對應的參數。

- ARGS_FRAMEWORK (深度學習架構)
    - tf, tflite, trt
- ARGS_WEIGHTS (訓練好的權重路徑)
    - path to weights file
- ARGS_TINY='yolo' (是否壓縮模型)
    - yolo or yolo-tiny
- ARGS_MODEL='yolov4' (Yolo版本)
    - yolov3 or yolov4

In [1]:
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
# if len(physical_devices) > 0:
#     tf.config.experimental.set_memory_growth(physical_devices[0], True)
import core.utils as utils
from core.yolov4 import filter_boxes
from tensorflow.python.saved_model import tag_constants
from PIL import Image
import cv2
import numpy as np
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
import matplotlib.pyplot as plt

ARGS_FRAMEWORK='tf' # tf, tflite, trt
ARGS_WEIGHTS='./checkpoints/yolov4-car_final-416' # 'path to weights file'
ARGS_TINY='yolo' # 'yolo or yolo-tiny'
ARGS_MODEL='yolov4' # 'yolov3 or yolov4'


config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)


saved_model_loaded = tf.saved_model.load(ARGS_WEIGHTS, tags=[tag_constants.SERVING])
infer = saved_model_loaded.signatures['serving_default']

## Step 2: 預測
給與適當的初始化參數，並給定預測圖片的位置。

- ARGS_VIDEO (影像位置)
    - path to input image
- ARGS_SIZE=512 (輸入影像大小)
    - resize images to
- ARGS_OUTPUT (輸出影像的路徑)
    - path to output image
- ARGS_IOU (iou 閾值)
    - range 0~1
- ARGS_SCORE (信心程度閾值)
    - range 0~1
- ARGS_PREVIEW (使否要播放出來)
    - 預設 False

In [7]:
import time
start = time.time()

ARGS_VIDEO='./data/traffic.mov' # 'path to input image'
ARGS_OUTPUT_FORMAT='MP4V'
ARGS_SIZE=416 # 'resize images to'
ARGS_OUTPUT='result.mp4' # 'path to output image'
ARGS_IOU=0.45 # 'iou threshold'
ARGS_SCORE=0.8 # 'score threshold'
ARGS_PREVIEW=False

input_size = ARGS_SIZE
video_path = ARGS_VIDEO

vid = cv2.VideoCapture(video_path)

if ARGS_OUTPUT:
    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*ARGS_OUTPUT_FORMAT)
    out = cv2.VideoWriter(ARGS_OUTPUT, codec, fps, (width, height))

frame_id = 0
while True:
    return_value, frame = vid.read()
    if return_value:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(frame)
    else:
        print("Video processing complete")
        break

    frame_size = frame.shape[:2]
    image_data = cv2.resize(frame, (input_size, input_size))
    image_data = image_data / 255.
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    prev_time = time.time()

    batch_data = tf.constant(image_data)
    pred_bbox = infer(batch_data)
    for key, value in pred_bbox.items():
        boxes = value[:, :, 0:4]
        pred_conf = value[:, :, 4:]

    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores=tf.reshape(
            pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
        max_output_size_per_class=50,
        max_total_size=50,
        iou_threshold=ARGS_IOU,
        score_threshold=ARGS_SCORE
    )
    pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
    image = utils.draw_bbox(frame, pred_bbox)
    curr_time = time.time()
    exec_time = curr_time - prev_time
    result = np.asarray(image)
    info = "time: %.2f ms" %(1000*exec_time)
    print(info)

    result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if ARGS_PREVIEW:
        cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
        cv2.imshow("result", result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    if ARGS_OUTPUT:
        out.write(result)
        
    frame_id += 1

time: 49.44 ms
time: 30.42 ms
time: 30.60 ms
time: 28.94 ms
time: 29.95 ms
time: 28.57 ms
time: 30.16 ms
time: 28.27 ms
time: 28.13 ms
time: 28.41 ms
time: 27.87 ms
time: 27.17 ms
time: 27.26 ms
time: 27.07 ms
time: 27.13 ms
time: 27.07 ms
time: 27.02 ms
time: 27.13 ms
time: 26.92 ms
time: 27.28 ms
time: 27.02 ms
time: 27.02 ms
time: 26.94 ms
time: 27.01 ms
time: 26.99 ms
time: 27.34 ms
time: 26.95 ms
time: 26.85 ms
time: 27.31 ms
time: 27.45 ms
time: 28.01 ms
time: 27.55 ms
time: 27.47 ms
time: 27.39 ms
time: 26.97 ms
time: 27.24 ms
time: 27.36 ms
time: 27.23 ms
time: 27.29 ms
time: 27.24 ms
time: 27.17 ms
time: 27.14 ms
time: 27.30 ms
time: 27.23 ms
time: 27.30 ms
time: 27.07 ms
time: 27.27 ms
time: 27.27 ms
time: 27.23 ms
time: 27.21 ms
time: 27.16 ms
time: 27.21 ms
time: 27.19 ms
time: 27.17 ms
time: 27.14 ms
time: 27.20 ms
time: 27.33 ms
time: 27.16 ms
time: 27.25 ms
time: 27.45 ms
time: 28.97 ms
time: 27.22 ms
time: 27.19 ms
time: 28.28 ms
time: 27.35 ms
time: 28.11 ms
time: 28.7