In [4]:
import numpy as np
import tensorflow as tf
import cv2
import time
use_TPU  = False
from tflite_runtime.interpreter import Interpreter
from myprotos import string_int_label_map_pb2
from google.protobuf import text_format

#讀取LabelMap Function
def LoadLabelmap(LabelMapPath , UseDisplayName = True):
    with open(LabelMapPath) as f:
        Labelmap_String = f.read()
    LabelMap = string_int_label_map_pb2.StringIntLabelMap()
    text_format.Merge(Labelmap_String, LabelMap)
    LabelmapDict = {}
    for item in LabelMap.item:
        if (UseDisplayName == True):
            LabelmapDict[item.id] = item.display_name 
        else:
            LabelmapDict[item.id] = item.name 
    return LabelmapDict

#====讀取模型====
if use_TPU:
    from tflite_runtime.interpreter import load_delegate
    interpreter = Interpreter(model_path=r'tfliteModel/ForClass_SSD_ModileNetV2Edgetpu_COCO/detect_edgetpu.tflite',experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
else:
    interpreter = Interpreter(model_path=r'tfliteModel/ForClass_SSD_ModileNetV2Quantized_COCO/detect.tflite')

interpreter.allocate_tensors() #讀取配置模型
Input_Info = interpreter.get_input_details() #取得輸入節點資訊，取得輸入解析度與輸入節點的序號
Output_Info = interpreter.get_output_details() #取得輸出節點資訊

#====讀取labelmap====
LabelmapDict = LoadLabelmap(r'tfliteModel/ForClass_SSD_ModileNetV2Quantized_COCO/labelmap.pbtxt',False)

#====讀取圖型====
MyCam = cv2.VideoCapture('ValidateVideo.mp4') #使用影片物件檢測，改成Mcv2.VideoCapture(0)可以使用攝影機
while(MyCam.isOpened()):
    ret, cvimg = MyCam.read()
    if (ret == False): #影片讀取結束或是讀取錯誤，所以讀取失敗
        break
    cvimgRGB = cv2.cvtColor(cvimg, cv2.COLOR_BGR2RGB) #OpenCV顏色轉換成RGB顏色順序
    
    #====處理圖型 成為適合tensorflw格式====
    #因為tflite只能輸入固定大小的圖片，所以必須將輸入的圖片縮放成符合的尺寸
    cvimgRGB_Resized = cv2.resize(cvimgRGB, (Input_Info[0]['shape'][2], Input_Info[0]['shape'][1])) 
    Input_data = np.expand_dims(cvimgRGB_Resized, axis=0) #將原本的3維陣列 [寬,高,RGB]  轉成4維度的陣列[第幾張圖片, 寬，高，RGB]
    if (Input_Info[0]['dtype'] == np.float32): #判斷輸入的型態，是否需要將圖型正規劃(將數值由0~255轉換到-1~1)
        Input_data = (np.float32(Input_data) - 127.5) / 127.5 #要將輸入資料0-255的顏色資訊轉成 -1 ~ 1 的正規劃數值
    
    #====使用模型推理====
    StartTime = time.time()
    interpreter.set_tensor(Input_Info[0]['index'],Input_data) #設定輸入
    interpreter.invoke() #執行模型

    #====取出結果====
    detection_boxes    =interpreter.get_tensor(Output_Info[0]['index'])[0] # 輸出檢測框   [第0張圖片]
    detection_classes  =interpreter.get_tensor(Output_Info[1]['index'])[0] # 輸出檢測類別 [第0張圖片]
    detection_scores   =interpreter.get_tensor(Output_Info[2]['index'])[0] # 輸出檢測信信指數 [第0張圖片]
    num_detections     =interpreter.get_tensor(Output_Info[3]['index'])[0]  # 輸出檢測數量 [第0張圖片]

    #====繪出結果====
    for i in range(int(num_detections)):
        ClassID = int(detection_classes[i]+1) #ClassID由0開始，但是labelmap由1開始
        Score = float(detection_scores[i])
        if (Score > 0.5):
            y1 = int(detection_boxes[i][0] * cvimgRGB.shape[0])
            x1 = int(detection_boxes[i][1] * cvimgRGB.shape[1])
            y2 = int(detection_boxes[i][2] * cvimgRGB.shape[0])
            x2 = int(detection_boxes[i][3] * cvimgRGB.shape[1])
            cv2.rectangle(cvimg,(x1,y1),(x2,y2),(0,255,0),thickness=3)
            
            info = 'Class:{} Score:{}'.format(LabelmapDict[ClassID], round(Score,2))   
            #cv2.putText(目標圖片,   文字,   (x,y),字型             ,字體大小縮放,顏色(B,G,R),字體粗細))
            cv2.putText(cvimg, info,(x1+5,y1+30), cv2.FONT_HERSHEY_COMPLEX,1, (255, 255, 0), 2)
    EndTime = time.time()
    Dtime = EndTime - StartTime
    cv2.putText(cvimg, str(int(Dtime*1000)) + 'ms', (0 + 5, 0 + 30), cv2.FONT_HERSHEY_COMPLEX,1, (0, 255, 255), 1)
    cv2.imshow('Hello',cvimg) 
    if cv2.waitKey(1) == ord('q'):
                break
MyCam.release()
cv2.destroyAllWindows()