# Facial recognition using random forests
Carl Ehrett, 2019-04-30

In the following code block are all the variables that will (potentially) need to be redefined by the user.

In [None]:
n = 10 # Number of samples to record (of each person)
everyFrame = 24 # record an observation once every everyFrame frames
shapePath = ('C:\\Users\\carle\\Documents\\Python\\' # This is the path to the facial landmarks data file
              'Math 9810 Machine Learning\\'
              'shape_predictor_68_face_landmarks.dat')
camSrc = 0 # This should set the webcam to be the input, but you may need to change the value on your system
personNames = [['CARL'],['ERIN']] # Names of the people being recognized

Import all needed libraries. Note that you need to have all these libraries installed in order to use this Jupyter notebook.

In [None]:
from imutils.video import VideoStream
from imutils import face_utils
import numpy as np
import imutils
import time
import dlib
import cv2 # This one is the OpenCV library.
import matplotlib.pyplot as plt ; plt.rcdefaults
from sklearn.ensemble import RandomForestClassifier

Define the functions which will be used, in conjunction with the facial landmarks, to define covariates. The `getArea()` function takes as input three points (in 2D space) and returns the area of the triangle those points define. Similarly, `getAngle()` returns the angle formed by its three input points. Finally, `getLength()` takes two points as input, and returns the distance between them.

In [None]:
def getArea(x,y,z):
    a = x-y
    b = z-y
    area = np.abs( a[0] * b[1] - a[1] * b[0] ) / 2
    return area

def getAngle(x,y,z):
    a = x-y
    b = z-y
    cosineAngle = np.dot(a,b) / ( np.linalg.norm(a) * np.linalg.norm(b) )
    angle = np.arccos(cosineAngle)
    return angle
    
def getLength(x,y):
    length = np.linalg.norm(x-y)
    return length

Define function to gather facial covariates:

In [None]:
def facedat(scaledShape):
    dat = [
        ### Lengths    
        # Outer eyes width:
        getLength(scaledShape[45,],scaledShape[36,]),
        # Inner eyes width:
        getLength(scaledShape[42,],scaledShape[39,]),
        # Nose length:
        getLength(scaledShape[27,],scaledShape[33,]),
        # Nose width:
        getLength(scaledShape[35,],scaledShape[31,]),
        # Outer mouth width:
        getLength(scaledShape[54,],scaledShape[48,]),
        # Inner mouth width:
        getLength(scaledShape[64,],scaledShape[60,]),
        # Outer mouth height:
        getLength(scaledShape[51,],scaledShape[57,]),
        # Inner mouth height:
        getLength(scaledShape[62,],scaledShape[66,]),
        # Jaw to eye, left and right:
        getLength(scaledShape[36,],scaledShape[10,]) +
        getLength(scaledShape[16,],scaledShape[45,]),
        # Lower jaw width:
        getLength(scaledShape[12,],scaledShape[4,]),
        # Eye to mouth, left and right:
        getLength(scaledShape[39,],scaledShape[48,]) +
        getLength(scaledShape[42,],scaledShape[54,]),
        # Eyebrow widths, left and right:
        getLength(scaledShape[21,],scaledShape[17,]) + 
        getLength(scaledShape[22,],scaledShape[26,]),
        # Nose to mouth:
        getLength(scaledShape[33,],scaledShape[51,]),
        # Outer eye to eyebrow, left and right:
        getLength(scaledShape[17,],scaledShape[36,]) +
        getLength(scaledShape[26,],scaledShape[45,]),
        # Inner eye to eyebrow, left and right:
        getLength(scaledShape[21,],scaledShape[39,]) +
        getLength(scaledShape[22,],scaledShape[42,]),
        # Mouth to lower jaw, left and right:
        getLength(scaledShape[4,],scaledShape[48,]) +
        getLength(scaledShape[12,],scaledShape[54,]),
        # Mouth to chin:
        getLength(scaledShape[57,],scaledShape[8,]),
        # Inner eyebrow width:
        getLength(scaledShape[21,],scaledShape[22,]),
        # Total jaw length
        np.sum(np.linalg.norm(
                np.diff(scaledShape[:17,],axis=0),axis=1)),
        #
        ### Areas
        # Nose area:
        getArea(scaledShape[27,],scaledShape[31,],scaledShape[33,]),
        # Eye-nose area, left and right:
        getArea(scaledShape[39,],scaledShape[27,],scaledShape[31,]) +
        getArea(scaledShape[42,],scaledShape[27,],scaledShape[35,]),
        # Eye-mouth area, left and right:
        getArea(scaledShape[36,],scaledShape[39,],scaledShape[48,]) +
        getArea(scaledShape[42,],scaledShape[45,],scaledShape[54,]),
        # Nose-mouth area, left and right:
        getArea(scaledShape[31,],scaledShape[33,],scaledShape[51,]) +
        getArea(scaledShape[33,],scaledShape[51,],scaledShape[35,]),
        # Eyebrow-outer eye area, left and right:
        getArea(scaledShape[17,],scaledShape[21,],scaledShape[36,]) + 
        getArea(scaledShape[22,],scaledShape[26,],scaledShape[45,]),
        # Eye-eyebrow-jaw area, left and right:
        getArea(scaledShape[0,],scaledShape[17,],scaledShape[36,]) +
        getArea(scaledShape[16,],scaledShape[26,],scaledShape[45,]),
        # Eye-mouth-jaw area, left and right:
        getArea(scaledShape[0,],scaledShape[36,],scaledShape[48,]) +
        getArea(scaledShape[16,],scaledShape[45,],scaledShape[54,]),
        # Mouth-lower jaw-chin area, left and right:
        getArea(scaledShape[48,],scaledShape[4,],scaledShape[8,]) +
        getArea(scaledShape[54,],scaledShape[12,],scaledShape[8,]),
        # 
        ### Angles
        # Eye-nose-mouth angles, left and right
        getAngle(scaledShape[31,],scaledShape[39,],scaledShape[48,]) + 
        getAngle(scaledShape[35,],scaledShape[42,],scaledShape[54,]),
        # Mouth-jaw-lower jaw angles, left and right
        getAngle(scaledShape[48,],scaledShape[0,],scaledShape[4,]) + 
        getAngle(scaledShape[54,],scaledShape[16,],scaledShape[12,]),
        # Nose-eye-eyebrow angles, left and right
        getAngle(scaledShape[27,],scaledShape[39,],scaledShape[21,]) +
        getAngle(scaledShape[27,],scaledShape[42,],scaledShape[22,])
    ]
    return(dat)

Define function that takes a set of facial landmarks, and rotates them (so face is vertical) and scales them with respect to height and width.

In [None]:
def scaleFace(shape):
    chinPt = shape[8]
    scaledShape = shape - chinPt
    # Get and correct face angle
    faceAngle = np.arctan(
            (scaledShape[16,1]-scaledShape[0,1])/(
                    scaledShape[16,0]-scaledShape[0,0]))
    cosFA = np.cos(faceAngle)
    sinFA = np.sin(faceAngle)
    rotMat = [[cosFA, -sinFA],[sinFA, cosFA]]
    scaledShape = np.matmul(scaledShape,rotMat)
    faceWidth = scaledShape[16,0] - scaledShape[0,0]
    faceHeight = scaledShape[8,1] - scaledShape[27,1]
    scaledShape = scaledShape / [faceWidth, faceHeight]
    return(scaledShape)

Define some useful variables, allocate some space:

In [None]:
p = 30 # Number of covariates to record
X = np.zeros([2*n,p])

Initialize the dlib facial detector, and use the facial landmark data to make the facial landmark predictor. The predictor (`predictor`) is what will give us the 2D locations of the landmarks of faces that appear in the webcam stream.

In [None]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(shapePath)

Start the video stream thread:

In [None]:
vs = VideoStream(src=camSrc).start()
time.sleep(1.0) # Just to make sure the camera has time to wake up

Now define a loop that continually reads and outputs the webcam input, and (every `everyFrame` frames) collects a sample of the face that appears in the video feed. For data collection, only one person should present their face to the webcam. After `n` samples are collected, the script will prompt the user to show a different face to the webcam for the second half of the data collection process.

If your webcam feed isn't updating, that is possibly because it's not detecting any faces; the webcam only outputs when a face is found. Make sure you have good lighting on your face. Also, the facial landmark detector does pretty poorly with eyeglasses, in my experience, so you may want to remove those.

In [None]:
ii=0 # Counts how many times we've looped
samps=0 # Counts how many samples have been gathered
while True:
    # grab the frame from the threaded video file stream, resize
    # it, and convert it to grayscale
    # channels
    rects = []
    while np.size(rects) == 0:
        frame = vs.read()
        frame = imutils.resize(frame, width=450)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        #
        # detect faces in the grayscale frame
        rects = detector(gray, 0)
    
    rect = rects[0]
    
    # loop over the face detections
    if np.mod(ii,everyFrame)==0:
        # determine the facial landmarks for the face region, then
        # convert the facial landmark (x, y)-coordinates to a NumPy
        # array
        shape = predictor(gray, rect)
        shape = face_utils.shape_to_np(shape)
        
        ##############
        # get centered version scaled wrt face width and height
        scaledShape = scaleFace(shape)
        
        if n>0:
            # Gather covariates
            X[samps,] = facedat(scaledShape)
        
        samps += 1 # Increment sample counter
        
        ##################
        
    #########
    # Draw on the face in a way that helps visualize the covariates being collected.
    cv2.drawContours(frame, [
            cv2.convexHull(shape[[27,33,31]]),
            cv2.convexHull(shape[[27,33,35]]),
            cv2.convexHull(shape[[51,33,31]]),
            cv2.convexHull(shape[[51,33,35]]),
            cv2.convexHull(shape[[27,39,31]]),
            cv2.convexHull(shape[[27,35,42]]),
            cv2.convexHull(shape[[22,45,42]]),
            cv2.convexHull(shape[[36,39,21]]),
            cv2.convexHull(shape[[21,27,22]]),
            cv2.convexHull(shape[[36,39,48]]),
            cv2.convexHull(shape[[42,45,54]]),
            cv2.convexHull(shape[[48,54,8]]),
            cv2.convexHull(shape[[4,48,8]]),
            cv2.convexHull(shape[[8,54,12]]),
            cv2.convexHull(shape[[4,48,0]]),
            cv2.convexHull(shape[[54,12,16]]),
            cv2.convexHull(shape[[0,36,17]]),
            cv2.convexHull(shape[[45,26,16]]),
            cv2.convexHull(shape[[17,21]]),
            cv2.convexHull(shape[[22,26]]),
            cv2.convexHull(shape[[57,8]]),
            cv2.convexHull(shape[[51,57]]),
            cv2.convexHull(shape[[62,66]]),
            cv2.convexHull(shape[[60,64]])
            ], -1, (0,255,0),1)
        
    
 
    # show the frame along with how many samples have been recorded
    cv2.putText(frame, "SAMPLES: {}".format(samps), (275, 30),
        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
    cv2.imshow("Frame", frame)
    #cv2.imwrite('images\\'+str(ii).zfill(5)+'.png',frame)
    key = cv2.waitKey(1) & 0xFF
 
    # if the `q` key was pressed, break from the loop
    if key == ord("q") or samps == 2*n:
        break  
    
    # if we've got n samples of the first person, prompt user to switch people
    if samps == n:
        input("Switch faces and press enter to continue...")
        ii = -1 # Start over
    
    # increment counter
    ii+=1
    
# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()

Make the response vector:

In [None]:
Y = np.zeros([2*n,1])
Y[n:2*n] = 1
Y = np.array(Y)

Make and fit the classifier:

In [None]:
clf = RandomForestClassifier(n_estimators=100,oob_score=True)
# Train the model
clf.fit(X,Y.ravel())
# Don't need data any more
del X,Y

Let's take a look at the OOB score, which estimates the generalization accuracy (without the need of a test set or cross-validation). Note that this is the OOB score -- it estimates how often the classifier gets it *right*, not how often it gets it *wrong*. For the OOB error, take `1-clf.oob_score_`.

In [None]:
print(clf.oob_score_)

Make a bar plot of feature importances. 

In [None]:
objects = ('Outer eyes width','Inner eyes width','Nose length','Nose width',
           'Outer mouth width','Inner mouth width','Outer mouth height',
           'Inner mouth height','Jaw to eye','Lower jaw width',
           'Eye to mouth','Eyebrow widths','Nose to mouth',
           'Outer eye to brow','Inner eye to brow','Mouth to lower jaw',
           'Mouth to chin','Inner brow width','Total jaw length',
           'Nose area','Eye-nose area','Eye-mouth area','Nose-mouth area',
           'Brow-outer eye area','Eye-brow-jaw area','Eye-mouth-jaw area',
           'Mouth-low jaw-chin area','Eye-nose-mouth angle',
           'Mouth-jaw-low jaw angle','Nose-eye-brow angle')
ypos = np.arange(len(objects))
plt.figure(figsize=(4,6))
idxs = np.argsort(clf.feature_importances_) # We sort to list the features in order of descending importance
plt.barh(ypos,clf.feature_importances_[idxs],align='center',alpha=0.5)
plt.yticks(ypos,[objects[j] for j in idxs])
plt.xlabel('Importance')
plt.title('Feature importances')
plt.show()

Now we use the facial recognition classifier. This is a loop similar to the one used to gather data, but instead of gathering training data, we are now applying the classifier to each frame of the webcam input, and annotating the image appropriately. Press `q` to exit.

In [None]:
# First, start the videostream again:
vs = VideoStream(src=camSrc).start()
time.sleep(1.0)

ii=0 # Keep track of how many times we've looped
while True:
    # grab the frame from the threaded video file stream, resize
    # it, and convert it to grayscale
    # channels)

    frame = vs.read()
    frame = imutils.resize(frame, width=450)
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    #
    # detect faces in the grayscale frame
    rects = detector(gray, 0)
    
    # loop over the face detections
    for rect in rects:
        # determine the facial landmarks for the face region, then
        # convert the facial landmark (x, y)-coordinates to a NumPy
        # array
        shape = predictor(gray, rect)
        shape = face_utils.shape_to_np(shape)
 
        
        ##############
        # get centered version scaled wrt face width and height
        scaledShape = scaleFace(shape)
        chinPt = shape[8] # We'll use this to place text
        faceWidth = shape[16,0] - shape[0,0] # And this too
        
        # Gather covariates
        faceInput = facedat(scaledShape)
        
        # Figure out to whom the face belongs
        ident = int(clf.predict(np.array(faceInput).reshape(1,-1)))
        txtcol = [(0,0,255),(0,255,0)][ident]
        ident = personNames[ident]
        
        
        # Annotate the image with the identity
        cv2.putText(frame, ident[0], 
            (int(chinPt[0]-faceWidth/4),chinPt[1]+20),
            cv2.FONT_HERSHEY_SIMPLEX, 0.7, txtcol, 2)
        ##################
        
        
    # show the frame
    cv2.imshow("Frame", frame)
    #cv2.imwrite('images\\'+str(ii).zfill(5)+'.png',frame) # Uncomment this to save each frame as .png
    ii+=1 # Increment loop count
    key = cv2.waitKey(1) & 0xFF
 
    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break
        
# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()