### 套件載入 ↓

In [1]:
import os        
import cv2
import json
import pafy
import numpy as np
import core.utils as utils
from time import time, sleep
from IPython.display import clear_output

import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
from tensorflow.compat.v1 import ConfigProto, InteractiveSession
from tensorflow.python.saved_model import tag_constants

from threading import Thread
from queue import Queue

print('套件載入完成')

套件載入完成


### 模型載入、GPU初次啟動 ↓

In [2]:
set_size = 416 # set image size to 416
set_weights = r'.\checkpoints\yolov4-' + str(set_size) # path to weights file

config = ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5 # 設定GPU快取為50% GPU記憶體
session = InteractiveSession(config = config)

saved_model_loaded = tf.saved_model.load(set_weights, tags = [tag_constants.SERVING])
infer = saved_model_loaded.signatures['serving_default']

array_for_activate = np.zeros((set_size, set_size, 3))
array_for_activate = np.asarray([array_for_activate / 255]).astype(np.float32)
batch_data = tf.constant(array_for_activate)
pred_bbox = infer(batch_data)

print('模型載入、GPU初次啟動完成')

模型載入、GPU初次啟動完成


### 主要路徑設定、物件名稱載入 ↓

In [3]:
set_image_input = r'.\data\input_images' # path to image input
set_video_input = r'.\data\input_videos' # path to video input
set_image_output = r'.\data\output_images' # path to image output
set_video_output = r'.\data\output_videos' # path to video output
if not os.path.exists(set_image_output): os.mkdir(set_image_output)
if not os.path.exists(set_video_output): os.mkdir(set_video_output)

set_names_file = r'.\data\classes\coco.names' # path to class names file
names = utils.read_class_names(set_names_file)
print(names)

print('\n主要路徑設定、物件名稱載入完成')

{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorbike', 4: 'aeroplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'sofa', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tvmonitor', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: '

### 函式宣告 ↓

In [4]:
def preProcessing(img):
    img = cv2.resize(img, (set_size, set_size))
    img = np.asarray([img / 255]).astype(np.float32)
    return img
    
def detection(img):
    start_time = time()
    batch_data = tf.constant(img)
    pred_bbox = infer(batch_data)
    for _, value in pred_bbox.items():
        boxes = value[:, :, :4]
        pred_conf = value[:, :, 4:]
        
    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes = tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores = tf.reshape(pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
        max_output_size_per_class = 50,
        max_total_size = 50,
        iou_threshold = 0.45,
        score_threshold = 0.35
    )
    bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
    dtime = time() - start_time
    return bbox, dtime
    
def objectFilter(bbox, obj):
    Box, Score, Class, Quantity = bbox
    for i in range(Quantity[0]):
        if int(Class[0, i]) not in obj:
            Box[0, i] = np.asarray([0., 0., 0., 0.]) # [0. 0. 0. 0.]
            Score[0, i] = 0.
            Class[0, i] = 0.
    bbox = [Box, Score, Class, Quantity]
    return bbox
    
def drawImage(img, bbox):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = utils.draw_bbox(img, bbox)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    return img
    
def showResult(mode, img):
    cv2.imshow('Detection_Result', img)
    if mode == 'image':
        cv2.waitKey(0)
    elif mode == 'video':
        if cv2.waitKey(1) & 0xFF == ord('q'):
            return False
        else:
            return True
            
def writeFile(ind, img, bbox):
    json_data = []
    Box, Score, Class, Quantity = bbox
    for i in range(Quantity[0]):
        if Score[0, i] != 0.:
            Jbox = [int(box) for box in Box[0, i]]
            Jscore = float('%.2f' % (Score[0, i]))
            Jname = names[int(Class[0, i])]
            json_data.append({'bounding_box': Jbox, 'confidence': Jscore, 'object': Jname})
            
    cv2.imwrite(output_paths[ind], img)
    with open(json_paths[ind], 'w') as f:
        for d in json_data:
            f.write(json.dumps(d))
            f.write('\n')
    if ind == 0:
        print('\n已儲存圖片與偵測資訊到路徑：')
    print('%2d: %s(.json)' % (ind + 1, os.getcwd() + output_paths[ind][1:]))
    
def releaseVideo():
    cv2.destroyAllWindows()
    input_video.release()
    if write_video:
        output_video.release()
        print('已儲存影片到路徑：%s' % (os.getcwd() + output_path[1:]))
    
def printString(string, amount = 50):
    print('=' * amount, string, '=' * amount)
    
def processingTime(mode, time_prc, time_det, cnt):
    if mode == 'video':
        clear_output(wait = True)
    print('總處理時間：%.3f秒' % time_prc)
    print('平均處理時間：%.3f秒' % (time_prc / cnt))
    print('平均偵測時間：%.3f秒' % (time_det / cnt))
    
print('函式宣告完成')

函式宣告完成


### 圖片物件偵測 ↓

In [10]:
write_file = True # 是否寫入檔案
all_images = False # 是否讀取'data\input_images'內所有檔案
if all_images:
    files = os.listdir(set_image_input)
else:
    files = ['2174659396_374dd3c6f3_z.jpg', '3689297032_cac37fd7a8_z.jpg', '9523522126_171a47db50_z.jpg'] # 若否，則指定檔案
# ----------------------------------------------------------------------------------------------------- #
input_paths = [os.path.join(set_image_input, f) for f in files]
output_paths = [os.path.join(set_image_output, f) for f in files]
json_paths = [os.path.join(set_image_output, f.split('.')[0] + '.json') for f in files]

print('已讀取%d個圖片路徑：' % len(files))
for i, path in enumerate(input_paths):
    print('%2d: %s' % (i + 1, os.getcwd() + path[1:]))
# ----------------------------------------------------------------------------------------------------- #
all_object = True # 是否標記所有物件
particular_obj = [0, 2] # 若否，則指定物件
# ----------------------------------------------------------------------------------------------------- #
detection_time = 0
results = []
input_images = [cv2.imread(img) for img in input_paths]

start_time = time()
# ----------------------------------------------------------------------------------------------------- #
for i, ipimg in enumerate(input_images):
    image = ipimg
    image_pre = preProcessing(image)
    
    bounding_box, det_time = detection(image_pre)
    detection_time += det_time
    
    if not all_object:
        bounding_box = objectFilter(bounding_box, particular_obj)
    image_drw = drawImage(image, bounding_box)
    results.append(image_drw)
    
    if write_file:
        writeFile(i, image_drw, bounding_box)
    
printString('圖片物件偵測完成', 58)
# ----------------------------------------------------------------------------------------------------- #
processingTime('image', time() - start_time, detection_time,  len(files))

cv2.namedWindow('Detection_Result', cv2.WINDOW_AUTOSIZE)
for i in range(len(results)):
    showResult('image', results[i])
cv2.destroyAllWindows()

已讀取3個圖片路徑：
 1: D:\User Profiles\Desktop\Project\TensorFlow-YOLOv4\data\input_images\2174659396_374dd3c6f3_z.jpg
 2: D:\User Profiles\Desktop\Project\TensorFlow-YOLOv4\data\input_images\3689297032_cac37fd7a8_z.jpg
 3: D:\User Profiles\Desktop\Project\TensorFlow-YOLOv4\data\input_images\9523522126_171a47db50_z.jpg

已儲存圖片與偵測資訊到路徑：
 1: D:\User Profiles\Desktop\Project\TensorFlow-YOLOv4\data\output_images\2174659396_374dd3c6f3_z.jpg(.json)
 2: D:\User Profiles\Desktop\Project\TensorFlow-YOLOv4\data\output_images\3689297032_cac37fd7a8_z.jpg(.json)
 3: D:\User Profiles\Desktop\Project\TensorFlow-YOLOv4\data\output_images\9523522126_171a47db50_z.jpg(.json)
總處理時間：0.460秒
平均處理時間：0.153秒
平均偵測時間：0.059秒


### 讀取YouTube網址 ↓

In [5]:
url = 'https://www.youtube.com/watch?v=oA9g8KBY5r8' # video url
# ----------------------------------------------------------------------------------------------------- #
video = pafy.new(url)
file_name = video.videoid
for vst in video.videostreams:
    if str(vst) == 'video:mp4@1280x720':
        video720p = vst # get resolution 1280x720 mp4 (if exist)
        break
# ----------------------------------------------------------------------------------------------------- #
input_path = video720p.url
output_path = os.path.join(set_video_output, file_name + '.avi')
print('已讀取影片網址：%s' % url)

已讀取影片網址：https://www.youtube.com/watch?v=oA9g8KBY5r8


### 下載YouTube影片 ↓

In [6]:
video720p.download(filepath = os.path.join(set_video_input, file_name + '.mp4'))

  207,848,681.0 Bytes [100.00%] received. Rate: [35620 KB/s].  ETA: [0 secs]    


### 讀取影片檔案 ↓

In [5]:
file = 'oA9g8KBY5r8.mp4' # video file
# ----------------------------------------------------------------------------------------------------- #
input_path = os.path.join(set_video_input, file)
output_path = os.path.join(set_video_output, file.split('.')[0] + '.avi')
print('已讀取影片路徑：%s' % (os.getcwd() + input_path[1:]))

已讀取影片路徑：D:\User Profiles\Desktop\Personal\Project\YOLOv4-TensorFlow\data\input_videos\oA9g8KBY5r8.mp4


### 影片物件偵測 w/多執行緒 ↓

In [None]:
write_video = False # 是否寫入檔案
all_object = True # 是否標記所有物件
particular_obj = [0, 2] # 若否，則指定物件
# ----------------------------------------------------------------------------------------------------- #
frame_cnt = 0
input_video = cv2.VideoCapture(input_path)
max_frame = input_video.get(cv2.CAP_PROP_FRAME_COUNT)
fps = int(np.round(input_video.get(cv2.CAP_PROP_FPS)))

if write_video:
    codec = cv2.VideoWriter_fourcc(*'XVID')
    size = (int(input_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(input_video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    output_video = cv2.VideoWriter(output_path, codec, fps, size)
# ----------------------------------------------------------------------------------------------------- #
original_frame = Queue()
preprc_frame = Queue()
detection_data = Queue()
detection_time = Queue()
stop = False

def getFrame_mt():
    while not stop:
        if preprc_frame.qsize() < fps:
            ret, frame = input_video.read()
            if not ret:
                break
                
            frame_pre = preProcessing(frame)
            original_frame.put(frame)
            preprc_frame.put(frame_pre)
        else:
            sleep(0.5)
            
def detection_mt():
    total_dtime = 0
    while not stop:
        if preprc_frame.qsize() > 0:
            frame_pre = preprc_frame.get()
            bbox, dtime = detection(frame_pre)
            
            total_dtime += dtime
            detection_data.put(bbox)
            detection_time.put(total_dtime)
        else:
            sleep(0.001)
            
th1 = Thread(target = getFrame_mt)
th2 = Thread(target = detection_mt)
th1.start()
th2.start()

start_time = time()
# ----------------------------------------------------------------------------------------------------- #
while True:
    if detection_data.qsize() > 0:
        frame = original_frame.get()
        bounding_box = detection_data.get()
        
        if not all_object:
            bounding_box = objectFilter(bounding_box, particular_obj)
        frame_drw = drawImage(frame, bounding_box)
        
        if write_video:
            output_video.write(frame_drw)
            
        frame_cnt += 1
        processingTime('video', time() - start_time, detection_time.get(), frame_cnt)
        
        if frame_cnt == 1:
            cv2.namedWindow('Detection_Result', cv2.WINDOW_AUTOSIZE)
        if not showResult('video', frame_drw):
            stop = True
            printString('程式手動停止', 60)
            releaseVideo()
            break
            
        elif frame_cnt == max_frame:
            stop = True
            printString('影片物件偵測完成', 58)
            releaseVideo()
            break           
    else:
        sleep(0.001) # delay 1ms

總處理時間：17.993秒
平均處理時間：0.028秒
平均偵測時間：0.027秒
