In [1]:
# *************************************************************************************************
# Author: Andi Sama 
# Purpose: Face Liveness Detection
#   - Illustrates detection of human face liveness through blinking eyes
#     * Source: Video stream (Webcam/Video file)
#     * Process: Capture image in video, convert to grey then pass to a trained model for face recognition
#         & eyes recognition. Model (recognize eyes) is trained using keras (based on tensorflow)
#     * Output: Trained Face Recognized, Blinking Eyes Detected (open->close->open)
# Organization: Sinergi Wahana Gemilang
# Creation Date: April 1, 2020
# Changes history:
#   - April 6-10, 2020: Solving compatibility issues 
#       * convert scipy resize and reshape functions to numpy arrays (asm_eye_status.ipynb)
#   - April 10, 2020: Basic things done
#       * blinking eyes detection works, use a pre-trained model (94% accuracy)
#       * 'p' keystroke to pause while cv2 is showing frames with overlays
#   - April 11: Add additional video input 
#       * add a video file as input stream, in init() function
#   - April 12: Face recognition by name, using a trained model
#       * add 2 Korean artists in Netflix: "Chief of Staff" movie
#   - April 14-16: preparing for article in medium
#   - April 18-19: finalized
#       * User Selection for input source (Webcam/Default video file)
#       * Add section in this notebook to retrain keras model to recognize eyes 
#         Pass 'number of of max epoch' to train & experiment with training/validation accuracies
#   - April 24-26: Exception handling, write processed video to file
#       * exception handling for empty frame in detect_and_display()
#       * write processed video (overlay w/ face recognition/blinking eyes detection) to a file
#       * add date & time overlay as header in detect_and_display()
#       * add image overlay (eye clipart) if blinking eye detected in detect_and_display()
#   - May 2: Final - exception handling, write processed video to file
#       * fix logic error in processing select_source() return code (str -> int)
#       * test existing code with camera input: face with mask (OK)
# References:
#   A fork from https://github.com/Guarouba/face_rec (2019)
#   See article in medium.com/@andisama
# *************************************************************************************************

In [2]:
import os, platform, sys, time
from datetime import date
print('OS name:', os.name, ', system:', platform.system(), ', release:', platform.release())
print("Anaconda version:")
!conda list anaconda
print("Python version:", sys.version)
print("Python version info: ", sys.version_info)
import cv2
print("OpenCV version:", cv2.__version__)
import numpy as np
print("numpy version:", np.__version__)
import tensorflow as tf
print("Keras, tensorflow version:", tf.keras.__version__, tf.__version__)
from tqdm import tqdm
from collections import defaultdict
from asm_eye_status import * 
import face_recognition
print("Face Recognition version:", face_recognition.__version__)
import imutils
from imutils.video import VideoStream

OS name: nt , system: Windows , release: 10
Anaconda version:
# packages in environment at C:\Users\andis\anaconda3:
#
# Name                    Version                   Build  Channel
_anaconda_depends         2019.03                  py37_0  
anaconda                  custom                   py37_1  
anaconda-client           1.7.2                    py37_0  
anaconda-navigator        1.9.12                   py37_0  
anaconda-project          0.8.4                      py_0  
Python version: 3.7.7 (default, Mar 23 2020, 23:19:08) [MSC v.1916 64 bit (AMD64)]
Python version info:  sys.version_info(major=3, minor=7, micro=7, releaselevel='final', serial=0)
OpenCV version: 4.2.0
numpy version: 1.18.1
Keras, tensorflow version: 2.2.4-tf 2.1.0


Using TensorFlow backend.


Face Recognition version: 1.2.3


In [3]:
# # Create a deep learning model to recognize open/closed eyes
# # April 18-19, 2020
# #   => asm_eye_status.ipynb 
# # The following needs to be done only once
# #  - Train the deep learning model to recognize open & closed eyes
# #  - Once the model has been generated, this whole cell can be marked all as comments
# #  - 2 model files will be generated in current directory
# #    * model.h5 (keras-based model)
# #    * model.json
# # in about 100 epochs, achieved quite a good combination of training & validation accuracy
# #    * Epoch 100/100
# #    * 118/118 - 2s 19ms/step - loss: 0.0104 - accuracy: 0.9979 - val_loss: 5.0244e-06 - val_accuracy: 0.9556
# # (in general, 20 epochs should be enough to achieve about 94-95% accuracy on training & validation)
# epoch = 100
# # collect images dataset from dataset/train (training) and dataset/val (validation) directories
# train_generator, val_generator = collect()
# # train & save the model (h5 and json file)
# train(train_generator, val_generator, epoch)

In [4]:
def init(video_source):
    face_cascPath = 'haarcascade_frontalface_alt.xml'
    # face_cascPath = 'lbpcascade_frontalface.xml'

    open_eye_cascPath = 'haarcascade_eye_tree_eyeglasses.xml'
    left_eye_cascPath = 'haarcascade_lefteye_2splits.xml'
    right_eye_cascPath ='haarcascade_righteye_2splits.xml'
    dataset = 'faces'

    face_detector = cv2.CascadeClassifier(face_cascPath)
    open_eyes_detector = cv2.CascadeClassifier(open_eye_cascPath)
    left_eye_detector = cv2.CascadeClassifier(left_eye_cascPath)
    right_eye_detector = cv2.CascadeClassifier(right_eye_cascPath)

    # asama: modified to include input stream from a video file
    # run one of the following... input from video file or from integrated camera
    # 1. Either this - Integrated Camera
    source_resolution = (0, 0)
    
    if video_source == 0:
        print("[LOG] Opening webcam...")
        
        # video_capture = cv2.VideoCapture(0) # if using this one: OpenCV VideoCapture() => very slow 

        print("[LOG] Getting Camera Resolution...")
        # use this cv2.VideoCapture() just to get resolution of camera 
        cam = cv2.VideoCapture(0)
        cam_width = int(cam.get(3))
        cam_height = int(cam.get(4))
        source_resolution = (cam_width, cam_height) 
        print('Camera resolution (width, height) in pixels:', source_resolution)
        cam.release()  # immediately release camera after getting the resolution
        
        # switch to imutils VideoStream() for better bufferred frames' reading from camera
        video_capture = VideoStream(src=0).start() # imutils VideoStream(), much faster

    # 2. Or this one - 2nd: video file (video_source other than 0)
    else:
        current_directory = os.getcwd()
        video_file = 'data\Chief of Staff 2 Ep 1 Trailer.mp4'
        print("[LOG] Opening default video file...", current_directory + video_file)

        # video_capture = cv2.VideoCapture(video_file) # this one using OpenCV VideoCapture() is very slow 

        print("[LOG] Getting Video Resolution...")
        # use this cv2.VideoCapture() just to get resolution of the video file
        cam = cv2.VideoCapture(video_file)
        cam_width = int(cam.get(3))
        cam_height = int(cam.get(4))
        source_resolution = (cam_width, cam_height) 
        print('Video resolution (width, height) in pixels:', source_resolution)
        cam.release()  # immediately release camera after getting the resolution
        
        video_capture = VideoStream(src=video_file).start()  # imutils VideoStream(), much faster
#         source_resolution = ()
#         print('Video  resolution (width, height) in pixels:', source_resolution)
        
    model = load_model()

    print("[LOG] Collecting images...")
    images = []
    for direc, _, files in tqdm(os.walk(dataset)):
        for file in files:
            if file.endswith("jpg"):
                images.append(os.path.join(direc,file))
    # print(" >DEBUG (collected file names):", images)
    return (model, face_detector, open_eyes_detector, left_eye_detector, right_eye_detector, \
            video_capture, images, source_resolution) 

In [5]:
def process_and_encode(images):
    # initialize the list of known encodings and known names
    known_encodings = []
    known_names = []
    print("[LOG] Encoding faces...")

    for image_path in tqdm(images):
        # Load image
        image = cv2.imread(image_path)

        # Convert it from BGR to RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     
        # detect face in the image and get its location (square boxes coordinates)
        boxes = face_recognition.face_locations(image, model='hog')

        # Encode the face into a 128-d embeddings vector
        encoding = face_recognition.face_encodings(image, boxes)

        # the person's name is the name of the folder where the image comes from
        name = image_path.split(os.path.sep)[-2]

        if len(encoding) > 0 : 
            known_encodings.append(encoding[0])
            known_names.append(name)

    return {"encodings": known_encodings, "names": known_names}


In [6]:
def isBlinking(history, maxFrames):
    """ @history: A string containing the history of eyes status 
         where a '1' means that the eyes were closed and '0' open.
        @maxFrames: The maximal number of successive frames where an eye is closed """
    for i in range(maxFrames):
        pattern = '1' + '0'*(i+1) + '1'
        if pattern in history:
            return True
    return False

In [7]:
def detect_and_display(model, video_capture, face_detector, open_eyes_detector, left_eye_detector, right_eye_detector, data, eyes_detected, source_resolution, img_overlay):
        #  ret, frame = video_capture.read() # OpenCV version, very slow
        frame = video_capture.read() # imutils VideoStream version, much faster

        # video frame resize        
#         # OpenCV version, very slow
#         if ret == True:
#             frame = cv2.resize(frame, (0, 0), fx=1.0, fy=1.0)
#             # frame = cv2.resize(frame, (0, 0), fx=0.6, fy=0.6)
#         else:
#             print('error reading - camera problem or file error?, exiting...')
#             return None

        # imutils VideoStream version for read buffering, much faster
        if frame is None:
            print('empty frame detected! - camera closed or end of file?, exiting...')
            return frame
        else:
            frame = cv2.resize(frame, (0, 0), fx=1.0, fy=1.0)
            # frame = cv2.resize(frame, (0, 0), fx=0.6, fy=0.

        frame = cv2.flip(frame, 1) # flip horizontal
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Detect faces
        faces = face_detector.detectMultiScale(
            gray,
            scaleFactor=1.2,
            minNeighbors=5,
            minSize=(50, 50),
            flags=cv2.CASCADE_SCALE_IMAGE
        )

        # for each detected face
        for (x,y,w,h) in faces:
            # Encode the face into a 128-d embeddings vector
            encoding = face_recognition.face_encodings(rgb, [(y, x+w, y+h, x)])[0]

            # Compare the vector with all known faces encodings
            matches = face_recognition.compare_faces(data["encodings"], encoding)

            # For now we don't know the person name
            name = "Unknown"

            # If there is at least one match:
            if True in matches:
                matchedIdxs = [i for (i, b) in enumerate(matches) if b]
                counts = {}
                for i in matchedIdxs:
                    name = data["names"][i]
                    counts[name] = counts.get(name, 0) + 1

                # determine the recognized face with the largest number of votes
                name = max(counts, key=counts.get)

            face = frame[y:y+h,x:x+w]
            gray_face = gray[y:y+h,x:x+w]

            eyes = []
            
            # Eyes detection
            # check first if eyes are open (with glasses taking into account)
            open_eyes_glasses = open_eyes_detector.detectMultiScale(
                gray_face,
                scaleFactor=1.1,
                minNeighbors=5,
                minSize=(30, 30),
                flags = cv2.CASCADE_SCALE_IMAGE
            )
            # if open_eyes_glasses detect eyes then they are open 
            if len(open_eyes_glasses) == 2:
                eyes_detected[name]+='1'
                for (ex,ey,ew,eh) in open_eyes_glasses:
                    cv2.rectangle(face,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)
            
            # otherwise try detecting eyes using left and right_eye_detector
            # which can detect open and closed eyes                
            else:
                # separate the face into left and right sides
                left_face = frame[y:y+h, x+int(w/2):x+w]
                left_face_gray = gray[y:y+h, x+int(w/2):x+w]

                right_face = frame[y:y+h, x:x+int(w/2)]
                right_face_gray = gray[y:y+h, x:x+int(w/2)]

                # Detect the left eye
                left_eye = left_eye_detector.detectMultiScale(
                    left_face_gray,
                    scaleFactor=1.1,
                    minNeighbors=5,
                    minSize=(30, 30),
                    flags = cv2.CASCADE_SCALE_IMAGE
                )

                # Detect the right eye
                right_eye = right_eye_detector.detectMultiScale(
                    right_face_gray,
                    scaleFactor=1.1,
                    minNeighbors=5,
                    minSize=(30, 30),
                    flags = cv2.CASCADE_SCALE_IMAGE
                )

                eye_status = '1' # we suppose the eyes are open

                # For each eye check wether the eye is closed.
                # If one is closed we conclude the eyes are closed
                for (ex,ey,ew,eh) in right_eye:
                    color = (0,255,0)
                    pred = predict(right_face[ey:ey+eh,ex:ex+ew],model)
                    if pred == 'closed':
                        eye_status='0'
                        color = (0,0,255)
                    cv2.rectangle(right_face,(ex,ey),(ex+ew,ey+eh),color,2)
                for (ex,ey,ew,eh) in left_eye:
                    color = (0,255,0)
                    pred = predict(left_face[ey:ey+eh,ex:ex+ew],model)
                    if pred == 'closed':
                        eye_status='0'
                        color = (0,0,255)
                    cv2.rectangle(left_face,(ex,ey),(ex+ew,ey+eh),color,2)
                eyes_detected[name] += eye_status

            # current date & time 
            c_datetime = str(date.today()) + ' ' + time.strftime("%H:%M:%S")
            width = source_resolution[0]
            height = source_resolution[1]
            x_hdr = int(width * 0.01) # starting bottom_x is 1% of the width on the top left
            y_hdr = int(height * 0.1) # startgin bottom_y is 10% of the height on the top left
            cv2.putText(frame, c_datetime, (x_hdr, y_hdr), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), 2)

            # location to put image overlay "blinking eye", maintaining 1% distance from top right x & y
            # blinking eyes image resolution to be displayed is 100x40 (width x height)
            x2_hdr = int(width - (.01 * width) - 100) # starting bottom_x     
            y2_hdr = int(height - (.99 * height)) # starting bottom_y  
            
            # Each time, we check if the person has blinked
            # If yes, we display its name
            if isBlinking(eyes_detected[name],3):
                cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
                # Display name
                y = y - 15 if y - 15 > 15 else y + 15
                cv2.putText(frame, name, (x, y), cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), 2)

                # display image overlay
                alpha = 0.4
                added_image = cv2.addWeighted(frame[y2_hdr:y2_hdr+40,x2_hdr:x2_hdr+100,:],alpha, img_overlay[0:40,0:100,:],1-alpha,0)
                # Change the region with the result
                frame[y2_hdr:y2_hdr+40,x2_hdr:x2_hdr+100] = added_image
       
        return frame


In [8]:
def select_source():
    valid_selections = ('0', '1')
    prompt = "Please select source:\n \
        0: Webcam\n \
        1: Videofile\n"
    selection = input(prompt)
    while not(selection in valid_selections):
        selection = input(prompt)
    return selection

In [9]:
if __name__ == "__main__":
    print("[LOG] Initialization...")

    # input in init(video_source); 0:WebCam, 1:VideoFile
    video_source = int(select_source())
    (model, face_detector, open_eyes_detector, left_eye_detector, right_eye_detector, \
         video_capture, images, source_resolution) = init(video_source)
    data = process_and_encode(images)

    # overlay image (eye clipart) width: 100, height: 40
    img_overlay = cv2.imread('data/icon_eye_100x40.png')
    
    # Define output filename
    out_dir = 'output/'
    if video_source == 0: # camera
        out_filename = out_dir + 'camera_face-blink_detect.mp4'
    else: # video file
        out_filename = out_dir + 'video_face-blink_detect.mp4'
        
    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    frame_rate = 5
    out = cv2.VideoWriter(out_filename, fourcc, frame_rate, source_resolution)
    
    eyes_detected = defaultdict(str)
    imshow_label = "Face Liveness Detector - Blinking Eyes (q-quit, p-pause)"
    print("[LOG] Detecting & Showing Images...")

    while True:
        frame = detect_and_display(model, video_capture, face_detector, open_eyes_detector,left_eye_detector,right_eye_detector, data, eyes_detected, source_resolution, img_overlay)
        if frame is None:
            break
        out.write(frame)
        cv2.imshow(imshow_label, frame)
        
        # asama: modified to include p=pause
        key_pressed = cv2.waitKey(1)
        if key_pressed & 0xFF == ord('q'): # q=quit
            break
        elif key_pressed & 0xFF == ord('p'): # p=pause
            cv2.waitKey(-1)

    print("[LOG] Writing output file...", out_filename)            
    video_capture.stop()
    out.release()
    cv2.destroyAllWindows()
    print("[LOG] All done.")

[LOG] Initialization...
Please select source:
         0: Webcam
         1: Videofile
1
[LOG] Opening default video file... C:\Users\andis\Code\FaceRecdata\Chief of Staff 2 Ep 1 Trailer.mp4
[LOG] Getting Video Resolution...
Video resolution (width, height) in pixels: (1280, 720)


4it [00:00, 1335.77it/s]
  0%|          | 0/82 [00:00<?, ?it/s]

[LOG] Collecting images...
[LOG] Encoding faces...


100%|██████████| 82/82 [00:47<00:00,  1.74it/s]


[LOG] Detecting & Showing Images...
empty frame detected! - camera closed or end of file?, exiting...
[LOG] Writing output file... output/video_face-blink_detect.mp4
[LOG] All done.
