# 匯入Tensorflow

In [1]:
import tensorflow
if tensorflow.__version__.startswith('1.'):
    import tensorflow as tf
    from tensorflow.python.platform import gfile
else:
    import tensorflow as v2
    import tensorflow.compat.v1 as tf
    tf.disable_v2_behavior()
    import tensorflow.compat.v1.gfile as gfile
print("Tensorflow version:{}".format(tf.__version__))

Instructions for updating:
non-resource variables are not supported in the long term
Tensorflow version:2.5.0


# 匯入其他套件

In [2]:
import cv2,time
import numpy as np

# 影像串流初始化(本機電腦)

In [3]:
def video_init(source=0):
    '''
    source:影像的來源，
        1.若是USB camera或筆電內建camera，填數字0
        2.若是影片(.mp4 or .avi)，填影片的路徑
    '''
    cap = cv2.VideoCapture(source)
    #----獲取影像的高度與寬度
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)#預設值 480
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)#預設值 640

    return cap,int(height),int(width)

# 恢復PB檔案函數

In [4]:
def model_restore_from_pb(pb_path,node_dict,GPU_ratio=None):
    tf_node_dict = dict()
    with tf.Graph().as_default():
        config = tf.ConfigProto(log_device_placement=True,
                                allow_soft_placement=True,
                                )
        if GPU_ratio is None:
            config.gpu_options.allow_growth = True  
        else:
            config.gpu_options.per_process_gpu_memory_fraction = GPU_ratio 

        sess_pb = tf.Session(config=config)
        with gfile.FastGFile(pb_path, 'rb') as f:
            content = f.read()
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(content)
            sess_pb.graph.as_default()
            
            tf.import_graph_def(graph_def, name='')  # 匯入計算圖

        sess_pb.run(tf.global_variables_initializer())
        for key,value in node_dict.items():
            try:
                node = sess_pb.graph.get_tensor_by_name(value)
                tf_node_dict[key] = node
            except:
                print("節點名稱:{}不存在".format(key))
        return sess_pb,tf_node_dict

# 人臉偵測相關函數

In [5]:
def generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios, offset=0.5):
    '''
    generate anchors.
    :param feature_map_sizes: list of list, for example: [[40,40], [20,20]]
    :param anchor_sizes: list of list, for example: [[0.05, 0.075], [0.1, 0.15]]
    :param anchor_ratios: list of list, for example: [[1, 0.5], [1, 0.5]]
    :param offset: default to 0.5
    :return:
    '''
    anchor_bboxes = []
    for idx, feature_size in enumerate(feature_map_sizes):
        cx = (np.linspace(0, feature_size[0] - 1, feature_size[0]) + 0.5) / feature_size[0]
        cy = (np.linspace(0, feature_size[1] - 1, feature_size[1]) + 0.5) / feature_size[1]
        cx_grid, cy_grid = np.meshgrid(cx, cy)
        cx_grid_expend = np.expand_dims(cx_grid, axis=-1)
        cy_grid_expend = np.expand_dims(cy_grid, axis=-1)
        center = np.concatenate((cx_grid_expend, cy_grid_expend), axis=-1)

        num_anchors = len(anchor_sizes[idx]) +  len(anchor_ratios[idx]) - 1
        center_tiled = np.tile(center, (1, 1, 2* num_anchors))
        anchor_width_heights = []

        # different scales with the first aspect ratio
        for scale in anchor_sizes[idx]:
            ratio = anchor_ratios[idx][0] # select the first ratio
            width = scale * np.sqrt(ratio)
            height = scale / np.sqrt(ratio)
            anchor_width_heights.extend([-width / 2.0, -height / 2.0, width / 2.0, height / 2.0])

        # the first scale, with different aspect ratios (except the first one)
        for ratio in anchor_ratios[idx][1:]:
            s1 = anchor_sizes[idx][0] # select the first scale
            width = s1 * np.sqrt(ratio)
            height = s1 / np.sqrt(ratio)
            anchor_width_heights.extend([-width / 2.0, -height / 2.0, width / 2.0, height / 2.0])

        bbox_coords = center_tiled + np.array(anchor_width_heights)
        bbox_coords_reshape = bbox_coords.reshape((-1, 4))
        anchor_bboxes.append(bbox_coords_reshape)
    anchor_bboxes = np.concatenate(anchor_bboxes, axis=0)
    return anchor_bboxes

def decode_bbox(anchors, raw_outputs, variances=[0.1, 0.1, 0.2, 0.2]):
    '''
    Decode the actual bbox according to the anchors.
    the anchor value order is:[xmin,ymin, xmax, ymax]
    :param anchors: numpy array with shape [batch, num_anchors, 4]
    :param raw_outputs: numpy array with the same shape with anchors
    :param variances: list of float, default=[0.1, 0.1, 0.2, 0.2]
    :return:
    '''
    anchor_centers_x = (anchors[:, :, 0:1] + anchors[:, :, 2:3]) / 2
    anchor_centers_y = (anchors[:, :, 1:2] + anchors[:, :, 3:]) / 2
    anchors_w = anchors[:, :, 2:3] - anchors[:, :, 0:1]
    anchors_h = anchors[:, :, 3:] - anchors[:, :, 1:2]
    raw_outputs_rescale = raw_outputs * np.array(variances)
    predict_center_x = raw_outputs_rescale[:, :, 0:1] * anchors_w + anchor_centers_x
    predict_center_y = raw_outputs_rescale[:, :, 1:2] * anchors_h + anchor_centers_y
    predict_w = np.exp(raw_outputs_rescale[:, :, 2:3]) * anchors_w
    predict_h = np.exp(raw_outputs_rescale[:, :, 3:]) * anchors_h
    predict_xmin = predict_center_x - predict_w / 2
    predict_ymin = predict_center_y - predict_h / 2
    predict_xmax = predict_center_x + predict_w / 2
    predict_ymax = predict_center_y + predict_h / 2
    predict_bbox = np.concatenate([predict_xmin, predict_ymin, predict_xmax, predict_ymax], axis=-1)
    return predict_bbox

def single_class_non_max_suppression(bboxes, confidences, conf_thresh=0.2, iou_thresh=0.5, keep_top_k=-1):
    '''
    do nms on single class.
    Hint: for the specific class, given the bbox and its confidence,
    1) sort the bbox according to the confidence from top to down, we call this a set
    2) select the bbox with the highest confidence, remove it from set, and do IOU calculate with the rest bbox
    3) remove the bbox whose IOU is higher than the iou_thresh from the set,
    4) loop step 2 and 3, util the set is empty.
    :param bboxes: numpy array of 2D, [num_bboxes, 4]
    :param confidences: numpy array of 1D. [num_bboxes]
    :param conf_thresh:
    :param iou_thresh:
    :param keep_top_k:
    :return:
    '''
    if len(bboxes) == 0: return []

    conf_keep_idx = np.where(confidences > conf_thresh)[0]

    bboxes = bboxes[conf_keep_idx]
    confidences = confidences[conf_keep_idx]

    pick = []
    xmin = bboxes[:, 0]
    ymin = bboxes[:, 1]
    xmax = bboxes[:, 2]
    ymax = bboxes[:, 3]

    area = (xmax - xmin + 1e-3) * (ymax - ymin + 1e-3)
    idxs = np.argsort(confidences)

    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        # keep top k
        if keep_top_k != -1:
            if len(pick) >= keep_top_k:
                break

        overlap_xmin = np.maximum(xmin[i], xmin[idxs[:last]])
        overlap_ymin = np.maximum(ymin[i], ymin[idxs[:last]])
        overlap_xmax = np.minimum(xmax[i], xmax[idxs[:last]])
        overlap_ymax = np.minimum(ymax[i], ymax[idxs[:last]])
        overlap_w = np.maximum(0, overlap_xmax - overlap_xmin)
        overlap_h = np.maximum(0, overlap_ymax - overlap_ymin)
        overlap_area = overlap_w * overlap_h
        overlap_ratio = overlap_area / (area[idxs[:last]] + area[i] - overlap_area)

        need_to_be_deleted_idx = np.concatenate(([last], np.where(overlap_ratio > iou_thresh)[0]))
        idxs = np.delete(idxs, need_to_be_deleted_idx)

    return conf_keep_idx[pick]

# 人臉偵測類別

In [6]:
class face_detection():
    def __init__(self,face_pb_path,margin = 40,GPU_ratio=None):
        
        node_dict = {'input':'data_1:0',
                     'detection_bboxes':'loc_branch_concat_1/concat:0',
                     'detection_scores':'cls_branch_concat_1/concat:0'}
        conf_thresh = 0.8
        iou_thresh = 0.7
        #----anchors config
        feature_map_sizes = [[33, 33], [17, 17], [9, 9], [5, 5], [3, 3]]
        anchor_sizes = [[0.04, 0.056], [0.08, 0.11], [0.16, 0.22], [0.32, 0.45], [0.64, 0.72]]
        anchor_ratios = [[1, 0.62, 0.42]] * 5
        
        #----generate anchors
        anchors = generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios)
        # for inference , the batch size is 1, the model output shape is [1, N, 4],
        # so we expand dim for anchors to [1, anchor_num, 4]
        anchors_exp = np.expand_dims(anchors, axis=0)
        sess,face_node_dict = model_restore_from_pb(face_pb_path, node_dict,GPU_ratio=GPU_ratio)
        tf_input = face_node_dict['input']
        shape = tf_input.shape
        model_shape = [None,shape[1].value,shape[2].value,shape[3].value]
#         print("model_shape = ", model_shape)
        detection_bboxes = node_dict['detection_bboxes']
        detection_scores = node_dict['detection_scores']
        
        self.margin = margin
        self.conf_thresh = conf_thresh
        self.iou_thresh = iou_thresh
        self.anchors_exp = anchors_exp
        self.model_shape = model_shape
        self.tf_input = tf_input
        self.sess = sess
        self.detection_bboxes = detection_bboxes
        self.detection_scores = detection_scores
    def infer(self,img):
        coors = list()
        height,width,_ = img.shape
        #----image processing
        img_resized = cv2.resize(img, (self.model_shape[2], self.model_shape[1]))
        img_resized = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
        img_resized = img_resized.astype('float32')
        img_resized /= 255
        
        #----mask detection
        feed_dict={self.tf_input: np.expand_dims(img_resized, axis=0)}
        y_bboxes_output, y_cls_output = self.sess.run([self.detection_bboxes, self.detection_scores],
                                                 feed_dict=feed_dict)
        y_bboxes = decode_bbox(self.anchors_exp, y_bboxes_output)[0]
        y_cls = y_cls_output[0]
        # To speed up, do single class NMS, not multiple classes NMS.
        bbox_max_scores = np.max(y_cls, axis=1)
        bbox_max_score_classes = np.argmax(y_cls, axis=1)

        # keep_idx is the alive bounding box after nms.
        keep_idxs = single_class_non_max_suppression(y_bboxes,
                                                     bbox_max_scores,
                                                     conf_thresh=self.conf_thresh,
                                                     iou_thresh=self.iou_thresh,
                                                     )
        #====draw bounding box
        for idx in keep_idxs:
            conf = float(bbox_max_scores[idx])
            class_id = bbox_max_score_classes[idx]
            bbox = y_bboxes[idx]
            # clip the coordinate, avoid the value exceed the image boundary.
            xmin = max(0, int(bbox[0] * width - self.margin // 2))
            ymin = max(0, int(bbox[1] * height - self.margin // 2))
            xmax = min(int(bbox[2] * width + self.margin // 2), width)
            ymax = min(int(bbox[3] * height + self.margin // 2), height)
            coors.append((xmin,ymin,xmax,ymax))
            
        return coors

# 影像串流與口罩判斷的函數

In [9]:
def mask_or_not(mask_pb_path,video_source=0,margin=40):
    #----var
    face_pb_path = "face_detection.pb"
    frame_count = 0
    FPS = "Initialing"
    GPU_ratio = 0.06
    nodename_dict = {
                    'input': 'input:0',
                    'keep_prob': 'keep_prob:0',
                    'prediction': 'prediction:0'
                     }
    label2classname_dict = {0:'no_mask',
                            1:"with_mask"}
                        
    #----影像串流初始化
    cap, height, width = video_init(video_source)
    print("影像高度:",height)
    print("影像寬度:",width)

    #----人臉偵測器初始化
    find_face = face_detection(face_pb_path,margin=margin,
                               GPU_ratio=GPU_ratio)

    #----Mask PB檔案初始化
    sess_infer,tf_node_dict = model_restore_from_pb(mask_pb_path,
                            nodename_dict,GPU_ratio=GPU_ratio)
    
    #----取出推論的節點
    pb_prediction = tf_node_dict['prediction']
    pb_input = tf_node_dict['input']
    pb_keep_prob = tf_node_dict['keep_prob']

    #----建立不斷獲取影像的while迴圈
    while (cap.isOpened()):

        #----向cap獲取影像
        ret, img = cap.read()
        if ret is True:
            #----人臉偵測
            coors = find_face.infer(img)
            
            if len(coors):
                for coor in coors:
                    xmin,ymin,xmax,ymax = coor#臉部區域座標
                    
                    #----擷取臉部區域
                    img_face = img[ymin:ymax,xmin:xmax,:].copy()
                    #----調整大小至80 x 80
                    img_face = cv2.resize(img_face,(80,80))
                    #----將三維資料轉換成四維資料
                    img_face = np.expand_dims(img_face,axis=0)
                    #----將數值型態從uint8轉換成float32
                    img_face = img_face.astype(np.float32)
                    #----資料標準化(Normalization)
                    img_face /= 255

                    #----口罩偵測
                    predictions = sess_infer.run(pb_prediction,
                                 feed_dict={pb_input:img_face, pb_keep_prob:1})
                    
                    #----根據label轉換成類別名稱
                    arg_predictions = np.argmax(predictions,axis=1)
                    classname = label2classname_dict[arg_predictions[0]]
                    #----根據類別名稱決定方框的顏色
                    if classname == 'with_mask':#有戴口罩，方框為綠色
                        color = (0, 255, 0)  # (B,G,R)
                    else:#沒戴口罩，方框為紅色
                        color = (0, 0, 255)  # (B,G,R)
                    #----畫上方框
                    cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color, 2)
                    #----標註上類別名稱
                    # cv2.putText(影像, 文字, 座標, 字型, 大小, 顏色, 線條寬度, 線條種類)
                    cv2.putText(img, classname, (xmin + 2, ymin - 2),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.8, color)

            #----FPS的計算
            if frame_count == 0:
                t_start = time.time()
            frame_count += 1
            if frame_count >= 20:
                t_stop = time.time()
                FPS = "FPS={}".format(round(20 / (t_stop - t_start)))
                frame_count = 0

            # cv2.putText(影像, 文字, 座標, 字型, 大小, 顏色, 線條寬度, 線條種類)
            cv2.putText(img, FPS, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)

            #----顯示影像
            cv2.imshow("demo by JohnnyAI", img)

            #----按鍵偵測
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
        else:
            print("取圖失敗")
            break

    #----影像串流停止，釋放資源
    cap.release()
    cv2.destroyAllWindows()

# 函數的使用

In [8]:
video_source = 0 #也可以貼上影片路徑(.mp4 或.avi)
mask_pb_path = r"infer_acc_0.99.pb"
margin = 40
mask_or_not(mask_pb_path,video_source=video_source,margin=margin)

影像高度: 480
影像寬度: 640
Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1

Instructions for updating:
Use tf.gfile.GFile.
Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1

