In [1]:
import cv2 as cv
import mediapipe as mp
import time
import utils, math
import numpy as np
# variables 
frame_counter = 0
CEF_COUNTER = 0
TOTAL_BLINKS = 0
# constants
CLOSED_EYES_FRAME = 3
FONTS =cv.FONT_HERSHEY_COMPLEX

# Left eyes indices 
LEFT_EYE = [ 362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385,384, 398 ]
LEFT_EYEBROW = [ 336, 296, 334, 293, 300, 276, 283, 282, 295, 285 ]

# right eyes indices
RIGHT_EYE = [ 33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161 , 246 ]  
RIGHT_EYEBROW = [ 70, 63, 105, 66, 107, 55, 65, 52, 53, 46 ]

In [2]:
# landmark detection function 
def landmarksDetection(img, results, draw=False):
    img_height, img_width= img.shape[:2]
    # list[(x,y), (x,y)....]
    mesh_coord = [(int(point.x * img_width), int(point.y * img_height)) for point in results.multi_face_landmarks[0].landmark]
    if draw :
        [cv.circle(img, p, 2, (0,255,0), -1) for p in mesh_coord]

    # returning the list of tuples for each landmarks 
    return mesh_coord

In [3]:
# Euclaidean distance 
def euclaideanDistance(point, point1):
    x, y = point
    x1, y1 = point1
    distance = math.sqrt((x1 - x)**2 + (y1 - y)**2)
    return distance

In [4]:
# Blinking Ratio
def blinkRatio(img, landmarks, right_indices, left_indices):
    # Right eyes 
    # horizontal line 
    rh_right = landmarks[right_indices[0]]
    rh_left = landmarks[right_indices[8]]
    # vertical line 
    rv_top = landmarks[right_indices[12]]
    rv_bottom = landmarks[right_indices[4]]
    # draw lines on right eyes 
    # cv.line(img, rh_right, rh_left, utils.GREEN, 2)
    # cv.line(img, rv_top, rv_bottom, utils.WHITE, 2)

    # LEFT_EYE 
    # horizontal line 
    lh_right = landmarks[left_indices[0]]
    lh_left = landmarks[left_indices[8]]

    # vertical line 
    lv_top = landmarks[left_indices[12]]
    lv_bottom = landmarks[left_indices[4]]

    rhDistance = euclaideanDistance(rh_right, rh_left)
    rvDistance = euclaideanDistance(rv_top, rv_bottom)

    lvDistance = euclaideanDistance(lv_top, lv_bottom)
    lhDistance = euclaideanDistance(lh_right, lh_left)

    reRatio = rhDistance/rvDistance
    leRatio = lhDistance/lvDistance

    ratio = (reRatio+leRatio)/2
    return ratio 

In [5]:
face_casecade = cv.CascadeClassifier('data/haarcascade_frontalface_default.xml')
net = cv.dnn.readNet('yolov4-tiny-custom_best.weights', 'yolov4-tiny-custom.cfg')
classes = []
with open('obj.names','r') as f:
    classes = f.read().splitlines()

boxes = [] # bounding boxes
confidences = [] # sotring the confidence
class_ids = [] # predicted classes

In [6]:
def predict(img):
    height, width, _ = img.shape
    blob = cv.dnn.blobFromImage(img, 1/255, (416,416), (0,0,0), swapRB = True, crop = False)
    net.setInput(blob) # settnig the input to the network
    output_layers_names = net.getUnconnectedOutLayersNames() # Get the output layers name
    layerOutputs = net.forward(output_layers_names) # forward pass and get the out from fthe output layer

    boxes = [] # bounding boxes
    confidences = [] # sotring the confidence
    class_ids = [] # predicted classes

    for output in layerOutputs: # extract all the info from the layerOutputs
        for detection in output: # extract info from each of the output
            scores = detection[5:] # extract all the classes starting from the 6 element because the first 5 elements have the sizes and the coordinates of the bounding box
            class_id = np.argmax(scores) # extract highest score location
            confidence = scores[class_id] # extracting the highest score and assigning to confidence

            if confidence > 0.2: #threash hold 
                center_x = int(detection[0]*width)
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)

                x = int(center_x - w/2)
                y = int(center_y - w/2) # getting the possitions upper left corner

                boxes.append([x,y,w,h])
                confidences.append((float(confidence)))
                class_ids.append(class_id)


    indexes = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) # 0.4 maximum supressions/ NMS - Only getting the highest accuracy boxes

    font = cv.FONT_HERSHEY_PLAIN
    colours = np.random.uniform(0, 255, size=(len(boxes), 3))
    return class_ids, confidences, indexes, font

In [9]:
map_face_mesh = mp.solutions.face_mesh
# camera object 
camera = cv.VideoCapture(0)
image = []
i = 0
with map_face_mesh.FaceMesh(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as face_mesh:

    # starting time here 
    start_time = time.time()
    # starting Video loop here.
    while True:
        frame_counter += 1 # frame counter
        ret, frame = camera.read() # getting frame from camera 
        if not ret: 
            break # no more frames break
        #  resizing frame
        
        frame = cv.resize(frame, None, fx = 1.5, fy = 1.5, interpolation = cv.INTER_CUBIC)
        frame_height, frame_width= frame.shape[:2]
        rgb_frame = cv.cvtColor(frame, cv.COLOR_RGB2BGR)
        results  = face_mesh.process(rgb_frame)
        if results.multi_face_landmarks:
            mesh_coords = landmarksDetection(frame, results, False)
            ratio = blinkRatio(frame, mesh_coords, RIGHT_EYE, LEFT_EYE)
            cv.putText(frame, f'ratio {ratio}', (100, 100), FONTS, 1.0, (0,255,0), 2)
            # utils.colorBackgroundText(frame,  f'Ratio : {round(ratio,2)}', FONTS, 0.7, (30,100),2, utils.PINK, utils.YELLOW)

            if ratio > 3.5:
                CEF_COUNTER += 1
                cv.putText(frame, 'Blink', (200, 50), FONTS, 1.3, (0,255,0), 2)
                # utils.colorBackgroundText(frame,  f'Blink', FONTS, 1.7, (int(frame_height/2), 100), 2, utils.YELLOW, pad_x=6, pad_y=6, )

            else:
                if CEF_COUNTER>CLOSED_EYES_FRAME:
                    TOTAL_BLINKS += 1
                    CEF_COUNTER = 0
            cv.putText(frame, f'Total Blinks: {TOTAL_BLINKS}', (100, 150), FONTS, 0.6, (0,255,0), 2)
            
            
            # cv.polylines(frame,  [np.array([mesh_coords[p] for p in LEFT_EYE ], dtype=np.int32)], True, (0,255,0), 1, cv.LINE_AA)
            # cv.polylines(frame,  [np.array([mesh_coords[p] for p in RIGHT_EYE ], dtype=np.int32)], True, (0,255,0), 1, cv.LINE_AA)


        # frame =utils.textWithBackground(frame,f'FPS: {round(fps,1)}',FONTS, 1.0, (30, 50), bgOpacity=0.9, textThickness=2)
        # writing image for thumbnail drawing shape
        # cv.imwrite(f'img/frame_{frame_counter}.png', frame)
        
        faces = face_casecade.detectMultiScale(frame)
        key = cv.waitKey(2)
        if key==ord('c') or TOTAL_BLINKS == 5:
            cv.putText(frame, "Take your pic", (25, 40), cv.FONT_HERSHEY_PLAIN, 2, (255,255,255), 2)
            faces = face_casecade.detectMultiScale(frame)
            for x,y,w,h in faces:
                face = frame[y:y+h, x:x+w, :]
                cv.imwrite('sample.jpg', face)
        
        img = cv.imread('sample.jpg')
        class_ids, confidences, indexes, font = predict(img)
        print(indexes)
        if len(indexes) > 0:
            for i in indexes.flatten():
                # x, y, w, h = boxes[i]
                print(class_ids[i])
                label = str(classes[class_ids[i]])
                print(label)
                confidence = str(round(confidences[i], 2))
                cv.putText(frame, label + "" + confidence, (25, 40), font, 2, (255,255,255), 2)

        cv.imshow('frame', frame)
        key = cv.waitKey(2)
        if key==ord('q') or key == ord('Q'):
            TOTAL_BLINKS = 0
            break
    cv.destroyAllWindows()
    camera.release()

[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
0
David
[3]
