In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from math import hypot
import pyautogui
import dlib

## Initialize The Deep neural network Module
Here we use a pretrained model weights and architecture  

In [None]:
# Path to the weights file
model_weights =  "model/res10_300x300_ssd_iter_140000_fp16.caffemodel"

# Path to the architecture file
model_arch = "model/deploy.prototxt.txt"

# Load the caffe model
net = cv2.dnn.readNetFromCaffe(model_arch, model_weights)


## Built architecture

In [None]:
def face_detector(image, threshold =0.7):

    height, width = image.shape[:2]

    # Apply mean subtraction, and create 4D blob from image
    blob = cv2.dnn.blobFromImage(image, 1.0,(300, 300), (104.0, 117.0, 123.0))
    net.setInput(blob)

    faces = net.forward()

    # Get the confidence value for all detected faces
    prediction_scores = faces[:,:,:,2]

    # Get the index of the prediction with highest confidence and get its face
    index = np.argmax(prediction_scores)
    face = faces[0,0,index]
    confidence = face[2]

    if confidence > threshold:
        # The 4 values at indexes 3 to 6 are the top-left, bottom-right coordinates
        # scales to range 0-1.The original coordinates can be found by
        # multiplying x,y values with the width,height of the image
        box = face[3:7] * np.array([width, height, width, height])

        # The coordinates are the pixel numbers relative to the top left
        # corner of the image therefore needs be quantized to int type
        (x1, y1, x2, y2) = box.astype("int")

        annotated_frame = cv2.rectangle(image.copy(), (x1, y1), (x2, y2), (0, 0, 255), 2)
        output = (annotated_frame, (x1, y1, x2, y2), True, confidence)
        
    else:
        output = (image,(),False, 0)
     
    return output

## our model

In [None]:
def detect_faces(image):

    # Create a face detector
    face_detector = dlib.get_frontal_face_detector()

    # Run detector and get bounding boxes of the faces on image.
    detected_faces = face_detector(image, 1)
    face_frames = [(x.left(), x.top(),
                    x.right(), x.bottom()) for x in detected_faces]

    return face_frames

In [None]:
import dlib
from PIL import Image
from skimage import io
import matplotlib.pyplot as plt
import cv2

def Face_detection(frame):
    # Load image
    #image = io.imread(img_path)
    # Detect faces
    detected_faces = detect_faces(frame)

    # Crop faces and plot
    for n, face_rect in enumerate(detected_faces):
        topleft = (face_rect[0],face_rect[1])
        bottomright = (face_rect[2],face_rect[3])
        annotated_frame = cv2.rectangle(frame.copy(), topleft, bottomright, (0, 0, 255), 2)
        coords = (face_rect[0],face_rect[1],face_rect[2],face_rect[3])
        #face = Image.fromarray(annotated_frame)#.crop(face_rect)
        #print(face_rect)
    return annotated_frame,coords

## webcam

In [None]:
# import the necessary packages
from collections import namedtuple
import numpy as np
import cv2
# define the `Detection` object
Detection = namedtuple("Detection", ["image_path", "gt", "pred"])

In [None]:
arrayofIOU = []

In [None]:
# Get the video feed from webcam
cap = cv2.VideoCapture(0)

# Set the window to a normal one so we can adjust it
cv2.namedWindow('face Detection', cv2.WINDOW_NORMAL)

counter = 0

while(True):
    # Read the frames
    ret, frame = cap.read()
    
    # Break if frame is not returned
    if not ret:
        break
    
    frame = cv2.flip( frame, 1 )
    
    # Detect face in the frame "ground-truth"
    annotated_frame, coords, status, conf = face_detector(frame)
    
    #OUR MODEL
    face,cooords = Face_detection(annotated_frame)
    
    # Display the frame
    cv2.imshow('face Detection',face)
    
    #saveimage
    filename = '/home/mora/Downloads/FinalCV/imageiou/image{}.jpg'.format(str(counter))
    cv2.imwrite(filename, face)
    counter += 1
    
    #create image tuple
    det = Detection(filename, cooords, coords)
    arrayofIOU.append(det)
    
    # Break the loop if 'q' key pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything is done, release the capture and destroy the window
cap.release()
cv2.destroyAllWindows()

## IOU

In [None]:
arrayofIOU

In [None]:
def bb_intersection_over_union(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)
    # return the intersection over union value
    return iou

In [None]:
# loop over the example detections
for detection in arrayofIOU:
    # load the image
    image = cv2.imread(detection.image_path)
    
    iou = bb_intersection_over_union(detection.gt, detection.pred)
    cv2.putText(image, "IoU: {:.4f}".format(iou), (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    #print("{}: {:.4f}".format(detection.image_path, iou))
    # show the output image
    #cv2.imshow("Image", image)
    #saveimage
    filename = detection.image_path
    cv2.imwrite(filename, image)
    
    cv2.waitKey(0)

## Landmarks Detection
In this part we use the pretrained model from dlib library that can detect 68 keypoints of the face as the one we buit has a very low accuracy

In [None]:
predictor = dlib.shape_predictor("model/shape_predictor_68_face_landmarks.dat")

First we define shapt_to_np function that creates an np array of shape (68, 2) for storing the landmark coordinates

In [None]:
def shape_to_np(shape):
    # Create an array of shape (68, 2) for storing the landmark coordinates
    landmarks = np.zeros((68, 2), dtype="int")

    # Write the x,y coordinates of each landmark into the array
    for i in range(0, 68):
        landmarks[i] = (shape.part(i).x, shape.part(i).y)

    return landmarks

Then we create the function to detect the facial landmarks. This function takes the box of the face detected by the face_detecor() function and the image as inputs and return the image with the landmarks on it and an np array of their positions

In [None]:
def detect_landmarks(box, image):
    gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Get the coordinates and apply the detection method
    (x1, y1, x2, y2) = box
    shape = predictor(gray_scale, dlib.rectangle(x1, y1, x2, y2))

    # Convert the shape into np array and draw the landmarks on the image
    landmarks = shape_to_np(shape)
    for (x, y) in landmarks:
        annotated_image = cv2.circle(image, (x, y),2, (0, 127, 255), -1)

    return annotated_image, landmarks

Testing the detect_landmarks() function

In [None]:
# Get the video feed from webcam and set the window to normal
cap = cv2.VideoCapture(0)
cv2.namedWindow('Landmark Detection', cv2.WINDOW_NORMAL)

while(True):
    ret, frame = cap.read()

    # Break if frame is not returned
    if not ret:
        break

    # If frame is returned, flip and detect the face
    frame = cv2.flip( frame, 1 )
    face_image, box_coords, status, conf = face_detector(frame)

    # Get the landmarks of the face detected
    if status:
        landmark_image, landmarks = detect_landmarks(box_coords, frame)

    cv2.imshow('Landmark Detection',landmark_image)

    # Break the loop if 'q' key pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()


## Jump Control mechanism

**determine if mouth open through calculating aspect ratio between length and width, and if bigger certain threshold, then mouth is open**

In [None]:
def if_open(landmarks, threshold = 0.7):
    # Calculate the euclidean distance labelled as d1,d2,d3
    d1 = hypot(landmarks[50][0] - landmarks[58][0], landmarks[50][1] - landmarks[58][1])
    d2 = hypot(landmarks[52][0] - landmarks[56][0], landmarks[52][1] - landmarks[56][1])
    d3 = hypot(landmarks[48][0] - landmarks[54][0], landmarks[48][1] - landmarks[54][1])

    # Calculate the mouth aspect ratio
    ratio = (d1 + d2) / (2.0 * d3)

    # Return True if the value is greater than the threshold
    if ratio > threshold:
        return True, ratio
    else:
        return False, ratio

In [None]:
# Get the video feed from webcam
cap = cv2.VideoCapture(0)

# Set the window to a normal one so we can adjust it
cv2.namedWindow('Mouth Status', cv2.WINDOW_NORMAL)

while(True):
    # Read the frames
    ret, frame = cap.read()

    # Break if frame is not returned
    if not ret:
        break

    # Flip the frame
    frame = cv2.flip( frame, 1 )

    # Detect face in the frame
    face_image, box_coords, status, conf = face_detector(frame)

    if status:
        # Get the landmarks for the face region in the frame
        landmark_image, landmarks = detect_landmarks(box_coords, frame)

        # Adjust the threshold and make sure it's working for you.
        mouth_status,_ = if_open(landmarks, 0.6)

        # Display the mouth status
        cv2.putText(frame,'Is Mouth Open: {}'.format(mouth_status),(20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 127, 255),2)
    # Display the frame
    cv2.imshow('Mouth Status',frame)
     
    # Break the loop if 'q' key pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture and destroy the window
cap.release()
cv2.destroyAllWindows()

## Crouch Control Mechanism

**Determine how close the face is to camera, to capture movements of face nearer to camera , in which translated later into down button in game**

In [None]:
def face_proximity(bounding_box,image, threshold = 325):

    # Get the height and width of the face bounding box
    f_width =  bounding_box[2]-bounding_box[0]
    f_height = bounding_box[3]-bounding_box[1]
    
    # Draw rectangle to guide the user
    # Calculate the angle of diagonal using face width, height
    theta = np.arctan(f_height/f_width)

    # Use the angle to calculate height, width of the guide rectangle
    height = np.sin(theta)*threshold
    width  = np.cos(theta)*threshold

    # Calculate the mid-point of the guide rectangle/face bounding box
    mid_x,mid_y = (bounding_box[2]+bounding_box[0])/2 , (bounding_box[3]+bounding_box[1])/2

    # Calculate the coordinates of top-left and bottom-right corners
    topleft = int(mid_x-(width/2)), int(mid_y-(height/2))
    bottomright = int(mid_x +(width/2)), int(mid_y + (height/2))

    # Draw the guide rectangle
    cv2.rectangle(image, topleft, bottomright, (0, 255, 255), 2)

    # Calculate the diagonal distance of the bounding box
    diagonal = hypot(width, height)

    # Return True if distance greater than the threshold
    if diagonal > threshold:
        return True, diagonal
    else:
        return False, diagonal


In [None]:
# Get the video feed from webcam
cap = cv2.VideoCapture(0)

# Set the window to a normal one so we can adjust it
cv2.namedWindow('Face proximity', cv2.WINDOW_NORMAL)

while(True):
    # Read the frames
    ret, frame = cap.read()

    # Break if frame is not returned
    if not ret:
        break
    
    # Flip the frame
    frame = cv2.flip( frame, 1 )
    
    # Detect face in the frame
    face_image, box_coords, status, conf = face_detector(frame)

    if status:
        # Check if face is closer than the defined threshold
        is_face_close,_ = face_proximity(box_coords, face_image, 300)

        # Display the mouth status
        cv2.putText(face_image,'Is Face Close: {}'.format(is_face_close),(20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 127, 255),2)
    # Display the frame
    cv2.imshow('Face proximity',face_image)

    # Break the loop if 'q' key pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture and destroy the window
cap.release()
cv2.destroyAllWindows()


## Calibration

In [None]:
# Get the video feed from webcam
cap = cv2.VideoCapture(0)

# Set the window to a normal one so we can adjust it
cv2.namedWindow('Calibration', cv2.WINDOW_NORMAL)

while(True):
    # Read the frames
    ret, frame = cap.read()

    # Break if frame is not returned
    if not ret:
        break

    # Flip the frame
    frame = cv2.flip( frame, 1 )

    # Detect face in the frame
    face_image, box_coords, status, conf = face_detector(frame)

    if status:
        # Detect landmarks if the frame is found
        landmark_image, landmarks = detect_landmarks(box_coords, frame)

        # Get the current mouth aspect ratio
        _,mouth_ar = if_open(landmarks)

        # Get the current face proximity
        _, proximity  = face_proximity(box_coords, face_image)

        # Calculate threshold values
        ar_threshold = mouth_ar*1.4
        proximity_threshold = proximity*1.3

        # Dsiplay the threshold values
        cv2.putText(frame, 'Aspect ratio threshold: {:.2f} '.format(ar_threshold),(20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 127, 255),2)
        cv2.putText(frame,'Proximity threshold: {:.2f}'.format(proximity_threshold),(20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 127, 255),2)
    # Display the frame
    cv2.imshow('Calibration',frame)

    # Break the loop if 'q' key pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
# When everything is done, release the capture and destroy the window
cap.release()
cv2.destroyAllWindows()

## Keyboard Automation

In [None]:
# This will open a context menu
pyautogui.click(button='right')

# Press space bar. This will scroll down the page in some browsers
pyautogui.press('space')

# This will move the focus to the next cell in the notebook
pyautogui.press(['shift','enter'])

# Hold down the shift key
pyautogui.keyDown('shift')

# Press enter while the shift key is down, this will run the next code cell
pyautogui.press('enter')

# Release the shift key
pyautogui.keyUp('shift')

# This will run automatically after running the two code cells above
print('I ran')


## Build The Final Application

In [None]:
pyautogui.FAILSAFE = False

In [None]:
# Get the video feed from webcam
cap = cv2.VideoCapture(0)

# Set the window to a normal one so we can adjust it
cv2.namedWindow('Dino with OpenCV', cv2.WINDOW_NORMAL)

# By default each key press is followed by a 0.1 second pause
pyautogui.PAUSE = 0.0

# The fail-safe triggers an exception in case mouse
# is moved to corner of the screen
#pyautogui.FAILSAFE = False

while(True):
     # Read the frames
    ret, frame = cap.read()

    # Break if frame is not returned
    if not ret:
        break
    # Flip the frame
    frame = cv2.flip( frame, 1 )

    # Detect face in the frame
    face_image, box_coords, status, conf = face_detector(frame)

    if status:
        # Detect landmarks if a face is found
        landmark_image, landmarks = detect_landmarks(box_coords, frame)

        # Check if mouth is open
        is_open,_ = if_open(landmarks, ar_threshold)
    
        # If the mouth is open trigger space key Down event to jump
        if is_open:
            pyautogui.keyDown('space')
            mouth_status = 'Open'
        else:
            # Else the space key is Up
            pyautogui.keyUp('space')
            mouth_status = 'Closed'
    
        # Display the mouth status on the frame
        cv2.putText(frame,'Mouth: {}'.format(mouth_status),(20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 127, 255),2)
        
        # Check the proximity of the face
        is_closer,_  = face_proximity(box_coords, frame, proximity_threshold)

        # If face is closer press the down key
        if is_closer:
            pyautogui.keyDown('down')
        else:
            pyautogui.keyUp('down')

    # Display the frame
    cv2.imshow('Dino with OpenCV',frame)

    # Break the loop if 'q' key pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

## Evaluation

In [None]:
!mkdir video_frames

In [None]:
import cv2
vidcap = cv2.VideoCapture('IMG_7492.MOV')
success,image = vidcap.read()
count = 0
while success:
  cv2.imwrite("./video_frames/frame%d.jpg" % count, image)     # save frame as JPEG file      
  success,image = vidcap.read()
  print('Read a new frame: ', success)
  count += 1

## Claculating the second metric (frame per second)

In [None]:
import time
import glob

start = time.time()

frames = glob.glob('video_frames/*')
number_of_frames = len(frames)

print(f'the number of Frames are {number_of_frames}')

predict_mouth = []

for frame in frames:
    frame = cv2.imread(frame)
    # Flip the frame
    frame = cv2.flip( frame, 1 )
    
    # Detect face in the frame
    face_image, box_coords, status, conf = face_detector(frame)
    
    if True:
        
        # Detect landmarks if a face is found
        landmark_image, landmarks = detect_landmarks(box_coords, frame)
        
        # Check if mouth is open
        is_open,_ = is_mouth_open(landmarks, ar_threshold)

        predict_mouth.append(is_open)
        
done = time.time()
elapsed = done - start

frame_per_sec = number_of_frames/elapsed
frame_per_sec

In [None]:
print(f'the model work with framerate {frame_per_sec} frame per second in cpu with 8 gb ram')

In [None]:
# 0 means the mouth is closed , 1 means the mouth is open
y_pred = predict_mouth

for i in range(len(y_pred)) :
    if y_pred[i] == False:
        y_pred[i] = 0
    else:
        y_pred[i] = 1
        

true_label = [] #the first half of frames the mouth is closed, the second half the mouth is open

for i in range(len(y_pred)) :
    if i <= (len(y_pred)/2):
        true_label.append(0)
    else:
        true_label.append(1)

In [None]:
from sklearn.metrics import confusion_matrix
tn, fp, fn, tp = confusion_matrix(true_label, y_pred).ravel()

print(f'The True Positive are  {tp}')
print(f'The False Negative are {fn}')
print(f'The False positive are {fp}')
print(f'The true negative are  {tn}')

In [None]:
confusion_matrix(true_label, y_pred)

In [None]:
plt.figure(figsize = (10,8))
sb.heatmap(confusion_matrix(true_label, y_pred), 
           annot = False,
           cmap = sb.color_palette("rocket", as_cmap=True));
plt.xticks(rotation = 45);
plt.ylabel("True label", ha="right", rotation = 0, color = "blue");
plt.xlabel("Predicted label", color = "blue");
plt.title("Test Data Confusion Matrix");