In [1]:
#import packages for facial recognition
import cv2
import tensorflow as tf
import numpy as np
import time
from joblib import load
from keras.models import load_model

# import packages for blink detection
from collections import defaultdict
from scipy.spatial import distance as dist
from imutils.video import FileVideoStream
from imutils.video import VideoStream
from imutils import face_utils
import numpy as np
import argparse
import imutils
import time
import dlib
import cv2

# from PIL import Image

from sklearn.preprocessing import Normalizer

from src import detectors
from src import face_encoders

FaceEncoder = face_encoders.FaceEncoder
FaceDetector = detectors.FaceDetector

import warnings

Using TensorFlow backend.


In [2]:
args = {
#     "detector":("DLIB",),
    "facenet_model": "src/encoders/tuned_facenet_keras.h5",
    "face_classifier": "trained-models/face_classifier_3.joblib",
    "face_label_encoder": "trained-models/face_label_encoder_3.joblib",
    "tau": "trained-models/tau_3.joblib",
    "avg_embeddings": "trained-models/avg_embeddings_3.joblib",
#     "facenet_model": "src/encoders/facenet_keras.h5",
#     "face_classifier": "trained-models/face_classifier_2.joblib",
#     "face_label_encoder": "trained-models/face_label_encoder_2.joblib",
#     "tau": "trained-models/tau_2.joblib",
#     "avg_embeddings": "trained-models/avg_embeddings_2.joblib",
    "shape_predictor": "src/detectors/shape_predictor_68_face_landmarks.dat"
}

In [3]:
def eye_aspect_ratio(eye):
    # compute the euclidean distances between the two sets of
    # vertical eye landmarks (x, y)-coordinates
    A = dist.euclidean(eye[1], eye[5])
    B = dist.euclidean(eye[2], eye[4])
    # compute the euclidean distance between the horizontal
    # eye landmark (x, y)-coordinates
    C = dist.euclidean(eye[0], eye[3])
    # compute the eye aspect ratio
    ear = (A + B) / (2.0 * C)
    # return the eye aspect ratio
    return ear

def cal_distance(emb1, emb2):
    return np.sum(np.square(emb1 - emb2))

print("[INFO] loading FaceNet model...")
encoder_model = FaceEncoder("facenet_keras", args["facenet_model"])
print("[INFO] Successfully loaded FaceNet model...")

# load the facial recognition model
print("[INFO] loading facial recognition model...")
face_clf = load(args["face_classifier"])
face_label_encoder = load(args["face_label_encoder"])
avg_embeddings = load(args["avg_embeddings"])
tau = load(args["tau"])
print("[INFO] Successfully loaded facial recognition model...")

# initialize dlib's face detector (HOG-based) and then create
# the facial landmark predictor
print("[INFO] loading facial landmark predictor...")
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(args["shape_predictor"])
# grab the indexes of the facial landmarks for the left and
# right eye, respectively
(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]

# initialize the video stream and allow the camera sensor to warmup
print("[INFO] starting video stream...")


# face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
in_encoder = Normalizer(norm='l2')

[INFO] loading FaceNet model...




[INFO] Successfully loaded FaceNet model...
[INFO] loading facial recognition model...
[INFO] Successfully loaded facial recognition model...
[INFO] loading facial landmark predictor...
[INFO] starting video stream...


In [4]:
warnings.filterwarnings('ignore')

#set threshold for blink detection
EYE_AR_THRESH = 0.20
EYE_AR_CONSEC_FRAMES = 1

cap = cv2.VideoCapture(1)
time.sleep(2.0)

blink = defaultdict(lambda : [0, 0, 0])

#loop over the frames from the video stream
while True:
    ids_in_frame = set()
    ret, frame = cap.read()
#     faces = face_detector.detect_faces(image)
    rects = detector(frame, 0)
#     print(frame.shape)
    for rect in rects:
        startX = rect.left()
        startY = rect.top()
        endX = rect.right()
        endY = rect.bottom()
#         print([startX, endX, startY, endY])
        
        if startX < 0 or startY < 0 or endX > frame.shape[1] or endY > frame.shape[0]:
            # move on if part of the face is outside the frame
            continue
        
        face = frame[startY:endY, startX:endX]
        face_160 = cv2.resize(face, (160, 160))
        face_emb = encoder_model.get_embedding(face_160)
        pred = face_clf.predict(in_encoder.transform(np.expand_dims(face_emb, axis=0)))[0]
        predict_identity = face_label_encoder.inverse_transform(pred)
        
        emb_dist = cal_distance(face_emb, avg_embeddings[predict_identity])
        if emb_dist >= tau:
        # override tau here
#         if emb_dist >= 45:
            predict_identity = "unknown"

        if predict_identity != "unkonwn":
            #blink detection
            ids_in_frame.add(predict_identity)
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            shape = predictor(gray, rect)
            shape = face_utils.shape_to_np(shape)
            # extract the left and right eye coordinates, then use the
            # coordinates to compute the eye aspect ratio for both eyes
            leftEye = shape[lStart:lEnd]
            rightEye = shape[rStart:rEnd]
            leftEAR = eye_aspect_ratio(leftEye)
            rightEAR = eye_aspect_ratio(rightEye)

            ear = (leftEAR + rightEAR) / 2.0

            if ear < EYE_AR_THRESH:
                blink[predict_identity][1] += 1
            # otherwise, the eye aspect ratio is not below the blink
            # threshold
            else:
                # if the eyes were closed for a sufficient number of
                # then increment the total number of blinks
                if blink[predict_identity][1] >= EYE_AR_CONSEC_FRAMES:
                    blink[predict_identity][0] += 1
                # reset the eye frame counter
                blink[predict_identity][1] = 0
            
        cv2.putText(frame, predict_identity+f": {str(round(emb_dist,2))}", (startX, startY - 10),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        if predict_identity != "unknown":
            cv2.putText(frame, "blinked: " + str(blink[predict_identity][0]), (startX, endY + 14),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        cv2.rectangle(frame, (startX, startY), (endX, endY),
            (0, 0, 255), 2)
    
    identities = list(blink)
    for identity in identities:
        if identity not in ids_in_frame:
            blink[identity][2] += 1
        if blink[identity][2] >= 15:
            del blink[identity]
#     # show the output frame and wait for a key press
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

#     # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break

# # do a bit of cleanup
cv2.destroyAllWindows()
cap.release()
