# python app - original gojek hackathon

In [1]:
import torch
import cv2
import numpy as np
import pandas as pd
from glob import glob
import csv
from collections import OrderedDict
import sys
sys.path.append('./RF')
from models.retinaface import RetinaFace
from data import cfg_mnet
from test_retina import infer
import os
from tqdm import tqdm
import glob
import argparse
from torch.utils.data import DataLoader
import torch.distributed as dist
import matplotlib.pyplot as plt
import random
from pathlib import Path
from scipy.special import softmax
from tqdm.contrib.concurrent import process_map
from torchvision.transforms import Compose, ToTensor
import matplotlib.patches as patches
from eye_net import *

device=torch.device('cpu')

# generate mouth boxes
def get_mouth_box(face_landmarks,height_multiplier):
    left_mouth,right_mouth=face_landmarks[6:8],face_landmarks[8:10]
    lx,ly=left_mouth
    rx,ry=right_mouth
    l,r=lx,rx
    mouth_box_width=r-l
    mouth_box_height=mouth_box_width*height_multiplier
    t=ly-mouth_box_height/2
    b=ry+mouth_box_height/2
    
    return [int(l),int(t),int(r),int(b)]

# generate eye boxes
def get_eye_boxes(face_landmarks,eye_multiplier):
    face_landmarks=np.array(face_landmarks).reshape(5,2)
    left_eye_landmark=face_landmarks[0]
    lx,ly=left_eye_landmark
    right_eye_landmark=face_landmarks[1]
    rx,ry=right_eye_landmark
    eye_box_width = (rx-lx)/eye_multiplier
    eye_box_height = eye_box_width
    left_l,left_t,left_r,left_b=lx-eye_box_width*0.5,ly-eye_box_height*0.5,lx+eye_box_width*0.5,ly+eye_box_height*0.5
    right_l,right_t,right_r,right_b=rx-eye_box_width*0.5,ry-eye_box_height*0.5,rx+eye_box_width*0.5,ry+eye_box_height*0.5    
    left_eye_bbox = [int(left_l),int(left_t),int(left_r),int(left_b)]
    right_eye_bbox = [int(right_l),int(right_t),int(right_r),int(right_b)]
    return left_eye_bbox,right_eye_bbox

# load image function for eye blink model
def load_image_eyes(image,left_bbox,right_bbox):
    expend_ratio = 0
#     image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    l, t, r, b = left_bbox
    w, h = r - l, b - t
    l, r = max(0, l - int(expend_ratio * w / 2)), min(image.shape[1] - 1, r + int(expend_ratio * w / 2))
    t, b = max(0, t - int(expend_ratio * h / 2)), min(image.shape[0] - 1, b + int(expend_ratio * h / 2))
    image1 = cv2.resize(image[t:b, l:r], (80, 80))
    
    l, t, r, b = right_bbox
    w, h = r - l, b - t
    l, r = max(0, l - int(expend_ratio * w / 2)), min(image.shape[1] - 1, r + int(expend_ratio * w / 2))
    t, b = max(0, t - int(expend_ratio * h / 2)), min(image.shape[0] - 1, b + int(expend_ratio * h / 2))
    image2 = cv2.resize(image[t:b, l:r], (80, 80))
    
    input_transform = Compose([
        ToTensor(),
    ])
        
    left_image = input_transform(np.array(image1, np.float32) / 255)
    right_image = input_transform(np.array(image2, np.float32) / 255)

    return left_image,right_image

def load_mouth_tensor(image,bbox):
    l,t,r,b=bbox
    image = cv2.resize(image[t:b, l:r], (80, 80))
    input_transform = Compose([
        ToTensor(),
    ])  
    image = input_transform(np.array(image, np.float32) / 255)
    return image

# initialise RetinaFace face detector model
cfg = cfg_mnet
mode = 'test'

face_detector_model = RetinaFace(cfg=cfg, phase=mode)
model_weight_path = '/Users/haidiazaman/Desktop/secure-face-capture-python-app/haidi_gojek_hackathon_codes/RF/weights/old_weights/mobilenet0.25_best.pth'

state_dict = torch.load(model_weight_path, map_location=device)
new_state_dict = OrderedDict()

for k, v in state_dict.items():
    head = k[:7]
    if head == 'module.':
        name = k[7:]  # remove `module.`
    else:
        name = k
    new_state_dict[name] = v

face_detector_model.load_state_dict(new_state_dict)

face_detector_model = face_detector_model.to(device)
_ = face_detector_model.eval()

confidence_threshold = cfg['infer_confidence_threshold']
nms_threshold = cfg['infer_nms_threshold']  # 0.4
iou_thresh = cfg['infer_iou_thresh']  # 0.5
img_dim = cfg['infer_image_size']

# initialise Eye blink model to detect eye blocking

eye_blink_model_path='/Users/haidiazaman/Desktop/secure-face-capture-python-app/haidi_gojek_hackathon_codes/weights/eyeblink_model_epoch908.pt'
eye_blink_model = Eye_Net('mobilenetv3_small_050',in_channel = 3, num_classes=3)
_ = eye_blink_model.to(device)

eye_blink_model.load_state_dict(torch.load(eye_blink_model_path, map_location = device))
_ = eye_blink_model.eval()

# initialise Mouth block model to detect eye blocking


mouth_block_model_path='/Users/haidiazaman/Desktop/secure-face-capture-python-app/haidi_gojek_hackathon_codes/weights/mouthblock_model_epoch910.pt'
mouth_block_model = Eye_Net('mobilenetv3_small_050',in_channel = 3, num_classes=2)
_ = mouth_block_model.to(device)

mouth_block_model.load_state_dict(torch.load(mouth_block_model_path, map_location = device))
_ = mouth_block_model.eval()

In [None]:
import cv2
import time

height_multiplier=0.9
eye_multiplier=2
cap=cv2.VideoCapture(0)
block_threshold = 0.8

while True:
    start_time=time.time()
    # Read a frame from the webcam
    ret, image = cap.read() #image is in BGR
    image=image[100:650,300:900]
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)


    left_eye_store=[]
    right_eye_store=[]
    mouth_store=[]
    SMOOTHING_WINDOW=20
    try:
        # get face detector - face box and face landmarks
        dets, img_raw = infer(face_detector_model, image, img_dim, device, cfg, confidence_threshold, nms_threshold)
        for det in dets:
            l,t,r,b=det[:4]
            face_bbox=[int(l),int(t),int(r),int(b)]
            face_score=det[4]
            face_landmarks=det[5:]
            left_eye=face_landmarks[:2]
            right_eye=face_landmarks[2:4]

            # Draw rectangles around the detected faces
            l,t,r,b=face_bbox
            cv2.rectangle(image, (l, t), (r, b), (255,0, 0), 2) #(255, 0, 0)this is color in BGR
            # Display face score beside the face bounding box
            text = f"Score: {face_score:.2f}"
            font = cv2.FONT_HERSHEY_SIMPLEX
            org = (l, t - 10)  # Place the text slightly above the face bounding box
            fontScale = 0.5
            color = (255, 255, 255)
            thickness = 1
            image = cv2.putText(image, text, org, font, fontScale, color, thickness, cv2.LINE_AA, False)

            
            # generate eye boxes and pass into eye blink model to get score
            # if detect open or close, plot green box, else red box           
            left_eye_bbox,right_eye_bbox = get_eye_boxes(face_landmarks,eye_multiplier)
            left_image,right_image=load_image_eyes(image,left_eye_bbox,right_eye_bbox)
            left_image,right_image=left_image.to(device),right_image.to(device)
            
            left_logits=eye_blink_model(left_image.unsqueeze(0)).cpu().detach().numpy()
            left_softmax_output=softmax(left_logits)
#             left_prediction=np.argmax(left_softmax_output)
            left_eye_store.append(left_softmax_output[0] [-1])
            
            right_logits=eye_blink_model(right_image.unsqueeze(0)).cpu().detach().numpy()
            right_softmax_output=softmax(right_logits)
#             right_prediction=np.argmax(right_softmax_output)
            right_eye_store.append(right_softmax_output[0] [-1])
            
#             print(np.mean(left_eye_store[-SMOOTHING_WINDOW:]))
#             print(np.mean(right_eye_store[-SMOOTHING_WINDOW:]))
            left_block_score=np.mean(left_eye_store[-SMOOTHING_WINDOW:])
            right_block_score=np.mean(right_eye_store[-SMOOTHING_WINDOW:])
            if left_block_score>block_threshold:
                l,t,r,b=left_eye_bbox
                cv2.rectangle(image, (l, t), (r, b), (0,0, 255), 2) #(255, 0, 0)this is color in BGR

                # Display face score beside the face bounding box
                text = f"Left Block Score: {left_block_score:.2f}"
                font = cv2.FONT_HERSHEY_SIMPLEX
                org = (l, t - 10)  # Place the text slightly above the face bounding box
                fontScale = 0.5
                color = (255, 255, 255)
                thickness = 1
                image = cv2.putText(image, text, org, font, fontScale, color, thickness, cv2.LINE_AA, False)

                # plot caption to tell user to remove blocking item
                text=f"Left eye blocked!"
                font = cv2.FONT_HERSHEY_SIMPLEX 
                org = (20, 100) 
                fontScale = 1
                color = (0,0,255) #BGR
                thickness = 2
                image = cv2.putText(image, text, org, font, fontScale,  color, thickness, cv2.LINE_AA, False) 
            else:
                l,t,r,b=left_eye_bbox
                cv2.rectangle(image, (l, t), (r, b), (0,255,0), 2) #(255, 0, 0)this is color in BGR
                
            if right_block_score>block_threshold:
                l,t,r,b=right_eye_bbox
                cv2.rectangle(image, (l, t), (r, b), (0,0, 255), 2) #(255, 0, 0)this is color in BGR

                # Display face score beside the face bounding box
                text = f"Right Block Score: {right_block_score:.2f}"
                font = cv2.FONT_HERSHEY_SIMPLEX
                org = (l, t - 10)  # Place the text slightly above the face bounding box
                fontScale = 0.5
                color = (255, 255, 255)
                thickness = 1
                image = cv2.putText(image, text, org, font, fontScale, color, thickness, cv2.LINE_AA, False)

                # plot caption to tell user to remove blocking item
                text=f"Right eye blocked!"
                font = cv2.FONT_HERSHEY_SIMPLEX 
                org = (300, 100) 
                fontScale = 1
                color = (0,0,255) #BGR
                thickness = 2
                image = cv2.putText(image, text, org, font, fontScale,  color, thickness, cv2.LINE_AA, False) 
            else:
                l,t,r,b=right_eye_bbox
                cv2.rectangle(image, (l, t), (r, b), (0,255,0), 2) #(255, 0, 0)this is color in BGR
            
            # generate mouth boxes and pass into mouth block model to get score
            # if detect not block, plot green box, else red box
            mouth_box=get_mouth_box(face_landmarks,height_multiplier)
            mouth_image=load_mouth_tensor(image,mouth_box)
            mouth_logits=mouth_block_model(mouth_image.unsqueeze(0)).cpu().detach().numpy()
            mouth_softmax_output=softmax(mouth_logits)
#             mouth_prediction=np.argmax(mouth_softmax_output)
            mouth_store.append(mouth_softmax_output[0][-1])
            mouth_block_score=np.mean(mouth_store[-SMOOTHING_WINDOW:])

            if mouth_block_score>block_threshold:
                l,t,r,b=mouth_box
                cv2.rectangle(image, (l, t), (r, b), (0,0, 255), 2) #(255, 0, 0)this is color in BGR

                # Display face score beside the face bounding box
                text = f"Mouth Block Score: {mouth_block_score:.2f}"
                font = cv2.FONT_HERSHEY_SIMPLEX
                org = (l, t - 10)  # Place the text slightly above the face bounding box
                fontScale = 0.5
                color = (255, 255, 255)
                thickness = 1
                image = cv2.putText(image, text, org, font, fontScale, color, thickness, cv2.LINE_AA, False)
                
                # plot caption to tell user to remove blocking item
                text=f"Mouth blocked!"
                font = cv2.FONT_HERSHEY_SIMPLEX 
                org = (250,450) 
                fontScale = 1
                color = (0,0,255) #BGR
                thickness = 2
                image = cv2.putText(image, text, org, font, fontScale,  color, thickness, cv2.LINE_AA, False) 
            else:
                l,t,r,b=mouth_box
                cv2.rectangle(image, (l, t), (r, b), (0,255,0), 2) #(255, 0, 0)this is color in BGR
                

            # Display the frame with detected faces
            end_time=time.time()-start_time
            text=f"Inference time: {end_time:.2f} seconds"
            font = cv2.FONT_HERSHEY_SIMPLEX 
            org = (40, 40) 
            fontScale = 0.75
            color = (255,255,255) 
            thickness = 2
            image = cv2.putText(image, text, org, font, fontScale,  color, thickness, cv2.LINE_AA, False) 
            cv2.imshow('Face Detection', image)
        
    except Exception as e:
        print(e)
        continue
    
    # Break the loop when 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close the window
cap.release()
cv2.destroyAllWindows()