In [1]:
# import the necessary packages
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import pickle
import time
import cv2
import os
import embedding
import resize
import facenet
import tensorflow as tf
# construct the argument parser and parse the arguments

# ap = argparse.ArgumentParser()
# ap.add_argument("-d", "--detector", required=True,
#     help="path to OpenCV's deep learning face detector")
# ap.add_argument("-m", "--embedding-model", required=True,
#     help="path to OpenCV's deep learning face embedding model")
# ap.add_argument("-r", "--recognizer", required=True,
#     help="path to model trained to recognize faces")
# ap.add_argument("-l", "--le", required=True,
#     help="path to label encoder")
# ap.add_argument("-c", "--confidence", type=float, default=0.5,
#     help="minimum probability to filter weak detections")
# args = vars(ap.parse_args())

In [2]:
args = {}
args["detector"] = "./model/face_detection_model"
args["embedding_model"] = "./model/20180402-114759.pb"
args["recognizer"] = "./model/mask_detector/face_detector_log.pkl"
args["recog_sc"] = "./model/mask_detector/log_sc.pkl"
args["le"] = "./model/mask_detector/le.pickle"
args["confidence"] = 0.5

args["wearc_model"] = "./model/wearing_correctness/correctness_nn.pkl"
args["wearc_sc"] = "./model/wearing_correctness/correct_sc.pkl"

args["face3_model"] = "./model/face_recognition/face3_logreg.pkl"
args["face3_sc"] = "./model/face_recognition/face3_sc.pkl"
args["face3_le"] = "./model/face_recognition/face3_le.pickle"

In [3]:
# load our serialized face detector from disk
print("[INFO] loading face detector...")
protoPath = os.path.sep.join([args["detector"], "deploy.prototxt"])
modelPath = os.path.sep.join([args["detector"],
    "res10_300x300_ssd_iter_140000.caffemodel"])
detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)

# load our serialized face embedding model from disk
print("[INFO] loading face recognizer...")
recog_sc = pickle.load(open(args["recog_sc"],'rb'))
recognizer = pickle.load(open(args["recognizer"],'rb'))
le = pickle.loads(open(args["le"], "rb").read())

wearc_model = pickle.load(open(args["wearc_model"],'rb'))
wearc_sc = pickle.load(open(args["wearc_sc"],'rb'))

face3_model = pickle.load(open(args["face3_model"],'rb'))
face3_sc = pickle.load(open(args["face3_sc"],'rb'))
face3_le = pickle.loads(open(args["face3_le"], "rb").read())

[INFO] loading face detector...
[INFO] loading face recognizer...


In [8]:
with tf.Graph().as_default():
    with tf.compat.v1.Session() as sess:
        np.random.seed(seed=666)
        # Load the model
        print('Loading feature extraction model')
        facenet.load_model(args["embedding_model"])

        
        # initialize the video stream, then allow the camera sensor to warm up
        print("[INFO] starting video stream...")
        vs = VideoStream(src=0).start()
#         time.sleep(2.0)

        # start the FPS throughput estimator
        fps = FPS().start()
        
        
        # loop over frames from the video file stream
        while True:
            # grab the frame from the threaded video stream
            frame = vs.read()

            # resize the frame to have a width of 600 pixels (while
            # maintaining the aspect ratio), and then grab the image
            # dimensions
            frame = imutils.resize(frame, width=600)
            (h, w) = frame.shape[:2]

            # construct a blob from the image
            imageBlob = cv2.dnn.blobFromImage(
                cv2.resize(frame, (300, 300)), 1.0, (300, 300),
                (104.0, 177.0, 123.0), swapRB=False, crop=False)

            # apply OpenCV's deep learning-based face detector to localize
            # faces in the input image
            detector.setInput(imageBlob)
            detections = detector.forward()

            # loop over the detections
            for i in range(0, detections.shape[2]):
                # extract the confidence (i.e., probability) associated with
                # the prediction
                confidence = detections[0, 0, i, 2]

                # filter out weak detections
                if confidence > args["confidence"]:
                    # compute the (x, y)-coordinates of the bounding box for
                    # the face
                    box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                    (startX, startY, endX, endY) = box.astype("int")

                    # extract the face ROI
                    face = frame[startY:endY, startX:endX]
                    (fH, fW) = face.shape[:2]

                    # ensure the face width and height are sufficiently large
                    if fW < 20 or fH < 20:
                        continue


                    img = resize.resize_addframe(face, 160, 160)
            
                    
                    # Get input and output tensors
                    images_placeholder = tf.compat.v1.get_default_graph().get_tensor_by_name("input:0")
                    embeddings = tf.compat.v1.get_default_graph().get_tensor_by_name("embeddings:0")
                    phase_train_placeholder = tf.compat.v1.get_default_graph().get_tensor_by_name("phase_train:0")
                    embedding_size = embeddings.get_shape()[1]

                    # Run forward pass to calculate embeddings
                    emb_array = np.zeros((1, embedding_size))
                    images = facenet.load_data(img, False, False, 160)
                    feed_dict = { images_placeholder:images, phase_train_placeholder:False }
                    emb_array = sess.run(embeddings, feed_dict=feed_dict)
                    vec = emb_array

                    # perform classification to recognize the face
                    vec_nor = recog_sc.transform(vec)
                    pred_label = recognizer.predict(vec_nor)
                    proba = recognizer.predict_proba(vec_nor)[0][int(pred_label)]
#                     print(pred_label, proba)
                    name = le.classes_[int(pred_label)]
    
                    if name == "mask":
                        vec_nor = wearc_sc.transform(vec)
                        wearc_label = wearc_model.predict(vec_nor)
                        wearc_proba = wearc_model.predict_proba(vec_nor)[0][int(wearc_label)]
                        if int(wearc_label) == 0:
                            tip = "wearing correctly"
                        elif int(wearc_label) == 1:
                            tip = "please wear your mask correctly"
                    elif name == "face":
                        vec_nor = face3_sc.transform(vec)
                        face3_label = face3_model.predict(vec_nor)
                        face3_proba = face3_model.predict_proba(vec_nor)[0][int(face3_label)]
                        person = face3_le.classes_[int(face3_label)]
                        tip = person + ": please wear your mask"
                        

                    # draw the bounding box of the face along with the
                    # associated probability
                    text = "{}: {:.2f}%".format(tip, float(proba) * 100)
                    y = startY - 10 if startY - 10 > 10 else startY + 10
                    if tip == "wearing correctly":
                        cv2.rectangle(frame, (startX, startY), (endX, endY),
                        (0, 255, 0), 2)
                        cv2.putText(frame, text, (startX, y),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)
                    elif tip == "please wear your mask correctly":
                        cv2.rectangle(frame, (startX, startY), (endX, endY),
                            (0, 255, 255), 2)
                        cv2.putText(frame, text, (startX, y),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0,255,255), 2)
                    else:
                        cv2.rectangle(frame, (startX, startY), (endX, endY),
                            (0, 0, 255), 2)
                        cv2.putText(frame, text, (startX, y),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)

            # update the FPS counter
            fps.update()

            # show the output frame
            cv2.imshow("Frame", frame)
            key = cv2.waitKey(1) & 0xFF

            # if the `q` key was pressed, break from the loop
            if key == ord("q"):
                break

        # stop the timer and display FPS information
        fps.stop()
        print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
        print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))

        # do a bit of cleanup
        cv2.destroyAllWindows()
        vs.stop()


Loading feature extraction model
Model filename: ./model/20180402-114759.pb
[INFO] starting video stream...
[INFO] elasped time: 1750.27
[INFO] approx. FPS: 9.14
