In [31]:
import tensorflow as tf
from tensorflow_docs.vis import embed
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (
        Convolution2D,
        LocallyConnected2D,
        MaxPooling2D,
        Flatten,
        Dense,
        Dropout,
    )
from tensorflow.keras.preprocessing import image

from imutils import paths
from datetime import date
from datetime import datetime

import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np
import imageio
import cv2
import os
import glob
import time
import IPython.display as ipd
from tqdm.notebook import tqdm
#from tqdm import tqdm
import subprocess
import math
from PIL import Image
import pickle


import deepface
from deepface import DeepFace


import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

In [32]:
MAX_SEQ_LENGTH = 20
NUM_FEATURES = 512
IMG_SIZE = 160
IMAGE_SIZE = (160, 160)

EPOCHS = 5
batch_size = 5

feature_model = 'Facenet'

In [33]:
def findCosineDistance(source_representation, test_representation):
    a = np.matmul(np.transpose(source_representation), test_representation)
    b = np.sum(np.multiply(source_representation, source_representation))
    c = np.sum(np.multiply(test_representation, test_representation))
    return 1 - (a / (np.sqrt(b) * np.sqrt(c)))


def findEuclideanDistance(source_representation, test_representation):
    if isinstance(source_representation, list):
        source_representation = np.array(source_representation)

    if isinstance(test_representation, list):
        test_representation = np.array(test_representation)

    euclidean_distance = source_representation - test_representation
    euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
    euclidean_distance = np.sqrt(euclidean_distance)
    return euclidean_distance


def l2_normalize(x):
    return x / np.sqrt(np.sum(np.multiply(x, x)))

def normalize(img):
    mean, std = img.mean(), img.std()
    img = (img - mean) / std
    return img

def findThreshold(model_name, distance_metric):

    base_threshold = {"cosine": 0.40, "euclidean": 0.55, "euclidean_l2": 0.75}

    thresholds = {
        "VGG-Face": {"cosine": 0.40, "euclidean": 0.60, "euclidean_l2": 0.86},
        "Facenet": {"cosine": 0.40, "euclidean": 10, "euclidean_l2": 0.80},
        "Facenet512": {"cosine": 0.30, "euclidean": 23.56, "euclidean_l2": 1.04},
        "ArcFace": {"cosine": 0.68, "euclidean": 4.15, "euclidean_l2": 1.13},
        "Dlib": {"cosine": 0.07, "euclidean": 0.6, "euclidean_l2": 0.4},
        "SFace": {"cosine": 0.593, "euclidean": 10.734, "euclidean_l2": 1.055},
        "OpenFace": {"cosine": 0.10, "euclidean": 0.55, "euclidean_l2": 0.55},
        "DeepFace": {"cosine": 0.23, "euclidean": 64, "euclidean_l2": 0.64},
        "DeepID": {"cosine": 0.015, "euclidean": 45, "euclidean_l2": 0.17},
    }

    threshold = thresholds.get(model_name, base_threshold).get(distance_metric, 0.4)

    return threshold

In [34]:
def alignment_procedure(img, left_eye, right_eye):
    # this function aligns given face in img based on left and right eye coordinates

    left_eye_x, left_eye_y = left_eye
    right_eye_x, right_eye_y = right_eye

    # -----------------------
    # find rotation direction

    if left_eye_y > right_eye_y:
        point_3rd = (right_eye_x, left_eye_y)
        direction = -1  # rotate same direction to clock
    else:
        point_3rd = (left_eye_x, right_eye_y)
        direction = 1  # rotate inverse direction of clock

    # -----------------------
    # find length of triangle edges

    a = findEuclideanDistance(np.array(left_eye), np.array(point_3rd))
    b = findEuclideanDistance(np.array(right_eye), np.array(point_3rd))
    c = findEuclideanDistance(np.array(right_eye), np.array(left_eye))

    # -----------------------

    # apply cosine rule

    if b != 0 and c != 0:  # this multiplication causes division by zero in cos_a calculation
        cos_a = (b * b + c * c - a * a) / (2 * b * c)
        angle = np.arccos(cos_a)  # angle in radian
        angle = (angle * 180) / math.pi  # radian to degree

        # -----------------------
        # rotate base image

        if direction == -1:
            angle = 90 - angle

        img = Image.fromarray(img)
        img = np.array(img.rotate(direction * angle))

    # -----------------------

    return img  # return img anyway

In [35]:
def build_detector_model():
    import mediapipe as mp  # this is not a must dependency. do not import it in the global level.

    mp_face_detection = mp.solutions.face_detection
    face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.7)
    return face_detection

def detect_faces(face_detector, img, align=True):
    resp = []

    img_width = img.shape[1]
    img_height = img.shape[0]

    results = face_detector.process(img)

    # If no face has been detected, return an empty list
    if results.detections is None:
        return resp

    # Extract the bounding box, the landmarks and the confidence score
    for detection in results.detections:
        (confidence,) = detection.score

        bounding_box = detection.location_data.relative_bounding_box
        landmarks = detection.location_data.relative_keypoints

        x = int(bounding_box.xmin * img_width)
        w = int(bounding_box.width * img_width)
        y = int(bounding_box.ymin * img_height)
        h = int(bounding_box.height * img_height)

        # Extract landmarks
        left_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
        right_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
        # nose = (int(landmarks[2].x * img_width), int(landmarks[2].y * img_height))
        # mouth = (int(landmarks[3].x * img_width), int(landmarks[3].y * img_height))
        # right_ear = (int(landmarks[4].x * img_width), int(landmarks[4].y * img_height))
        # left_ear = (int(landmarks[5].x * img_width), int(landmarks[5].y * img_height))

        if x > 0 and y > 0:
            detected_face = img[y : y + h, x : x + w]
            img_region = [x, y, w, h]

            if align:
                detected_face = alignment_procedure(detected_face, left_eye, right_eye)

            resp.append((detected_face, img_region, confidence))

    return resp

In [36]:
def extract_faces(
    img,
    target_size=(160, 160),
    detector_backend="mediapipe",
    grayscale=False,
    enforce_detection=True,
    align=True,
):
    """Extract faces from an image.

    Args:
        img: a path, url, base64 or numpy array.
        target_size (tuple, optional): the target size of the extracted faces.
        Defaults to (224, 224).
        detector_backend (str, optional): the face detector backend. Defaults to "opencv".
        grayscale (bool, optional): whether to convert the extracted faces to grayscale.
        Defaults to False.
        enforce_detection (bool, optional): whether to enforce face detection. Defaults to True.
        align (bool, optional): whether to align the extracted faces. Defaults to True.

    Raises:
        ValueError: if face could not be detected and enforce_detection is True.

    Returns:
        list: a list of extracted faces.
    """

    # this is going to store a list of img itself (numpy), it region and confidence
    extracted_faces = []

    # img might be path, base64 or numpy array. Convert it to numpy whatever it is.
    #img = load_image(img)
    img_region = [0, 0, img.shape[1], img.shape[0]]

    #if detector_backend == "skip":
    #    face_objs = [(img, img_region, 0)]
    #else:
    #    face_detector = FaceDetector.build_model(detector_backend)
    #    face_objs = FaceDetector.detect_faces(face_detector, detector_backend, img, align)
    face_detector = build_detector_model()
    face_objs = detect_faces(face_detector, img, align)

    
    
    # in case of no face found
    if len(face_objs) == 0 and enforce_detection is True:
        raise ValueError(
            "Face could not be detected. Please confirm that the picture is a face photo "
            + "or consider to set enforce_detection param to False."
        )

    if len(face_objs) == 0 and enforce_detection is False:
        face_objs = [(img, img_region, 0)]

    for current_img, current_region, confidence in face_objs:
        if current_img.shape[0] > 0 and current_img.shape[1] > 0:
            if grayscale is True:
                current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)

            # resize and padding
            if current_img.shape[0] > 0 and current_img.shape[1] > 0:
                factor_0 = target_size[0] / current_img.shape[0]
                factor_1 = target_size[1] / current_img.shape[1]
                factor = min(factor_0, factor_1)

                dsize = (
                    int(current_img.shape[1] * factor),
                    int(current_img.shape[0] * factor),
                )
                current_img = cv2.resize(current_img, dsize)

                diff_0 = target_size[0] - current_img.shape[0]
                diff_1 = target_size[1] - current_img.shape[1]
                if grayscale is False:
                    # Put the base image in the middle of the padded image
                    current_img = np.pad(
                        current_img,
                        (
                            (diff_0 // 2, diff_0 - diff_0 // 2),
                            (diff_1 // 2, diff_1 - diff_1 // 2),
                            (0, 0),
                        ),
                        "constant",
                    )
                else:
                    current_img = np.pad(
                        current_img,
                        (
                            (diff_0 // 2, diff_0 - diff_0 // 2),
                            (diff_1 // 2, diff_1 - diff_1 // 2),
                        ),
                        "constant",
                    )

            # double check: if target image is not still the same size with target.
            if current_img.shape[0:2] != target_size:
                current_img = cv2.resize(current_img, target_size)

            # normalizing the image pixels
            # what this line doing? must?
            img_pixels = image.img_to_array(current_img)
            img_pixels = np.expand_dims(img_pixels, axis=0)
            img_pixels /= 255  # normalize input in [0, 1]

            # int cast is for the exception - object of type 'float32' is not JSON serializable
            region_obj = {
                "x": int(current_region[0]),
                "y": int(current_region[1]),
                "w": int(current_region[2]),
                "h": int(current_region[3]),
            }

            extracted_face = [img_pixels, region_obj, confidence]
            extracted_faces.append(extracted_face)

    if len(extracted_faces) == 0 and enforce_detection == True:
        raise ValueError(
            f"Detected face shape is {img.shape}. Consider to set enforce_detection arg to False."
        )

    return extracted_faces

In [37]:
# Give facial image after extraction
# face_obj = extract_faces(img, target_size = target_frame_size, enforce_detection = False)

def get_embedding(img):
    img = np.reshape(img, (1, img.shape[0],img.shape[1],img.shape[2]))
    emb = np.array(feature_extractor([img]))
    return emb
    

In [51]:
def find_target_identity(img, embeddings):
    target_emb = np.squeeze(get_embedding(img))
    prev_dist = 100
    identity = ''
    for key in embeddings.keys():
        dist = findCosineDistance(embeddings[key], target_emb)
        if dist < prev_dist:
            prev_dist = dist
            identity = key
        else:
            continue
    
    if prev_dist > 0.4:
        identity = 'Not Found!!'
        
    return identity, prev_dist

In [39]:
import facenet_model
facenet_path = 'models/feature_extractors/facenet512_weights.h5'
feature_extractor = facenet_model.Facenet512(dimension = 512, weights_path = facenet_path)

In [40]:
def make_representations(database_path, representations_path):
    embeddings = {}
    target_frame_size = (IMG_SIZE, IMG_SIZE)
    students = os.listdir(database_path)
    
    for student in students:
        dir_path = os.path.join(database_path, student)
        student_imgs = os.listdir(dir_path)
        img_path = os.path.join(dir_path, student_imgs[0])
        img = cv2.imread(img_path)
        img = img[:, :, [2, 1, 0]]
        target_frame_size = (IMG_SIZE,IMG_SIZE)
        face_obj = extract_faces(img, target_size = target_frame_size, enforce_detection = False)
        emb  = np.squeeze(get_embedding(face_obj[0][0][0]))
        embeddings[student] = emb

    # save dictionary 
    with open(representations_path + '/representations.pkl', 'wb') as fp:
        pickle.dump(embeddings, fp)
        print('Face embeddings saved successfully to file')
        fp.close()    

In [41]:
def load_representations(representations_path):
    try:
        with open(representations_path + '/representations.pkl', 'rb') as fp:
            embeddings = pickle.load(fp)
            fp.close()
    except:
        embeddings = None
    return embeddings

In [42]:
def add_representation(identity, new_embedding, representations_path = 'representations'):
    embeddings = load_representations(representations_path)
    
    embeddings[identity] = new_embedding
    
    # save dictionary 
    with open(representations_path + '/representations.pkl', 'wb') as fp:
        pickle.dump(embeddings, fp)
        print('Face embeddings saved successfully to file')
        fp.close()
        
    return embeddings

In [43]:
# For first time entry
make_representations('database','representations')

Face embeddings saved successfully to file


In [62]:
def inference(video_path, database_path = 'database', representations_path = 'representations'):
    attendance_df = student_df.copy()
    attendance_df['Attendance'] = ['Absent' for i in range(student_df.shape[0])]
    
    if video_path:
        cap = cv2.VideoCapture(video_path)
    else: 
        cap = cv2.VideoCapture(0)
    
    embeddings = load_representations(representations_path)
    
    try:
      while True:
        ret, frame = cap.read()
        identity = 'Editing'
           
        frame = frame[:, :, [2, 1, 0]]
        target_frame_size = (IMG_SIZE,IMG_SIZE)
        face_obj = extract_faces(frame, target_size = target_frame_size, enforce_detection = False)
        bbox = face_obj[0][1]
        x = int(bbox['x'])
        y = int(bbox['y'])
        w = int(bbox['w'])
        h = int(bbox['h'])
        
        identity, distance = find_target_identity(face_obj[0][0][0], embeddings)
        if identity == 'Not Found!!':
            name = ''
            continue
        else:
            identity  = int(identity)
            attendance_df.loc[identity, 'Attendance'] = 'Present'
            name = attendance_df.loc[identity, 'Name']
            #print(f'Student ID : {identity}, Name : \'{name}\' Present')
        
        frame = frame[:, :, [2, 1, 0]]
        frame = np.ascontiguousarray(frame, dtype=np.uint8)
        cv2.rectangle(frame, (x,y), (x+w, y + h),(0,255,0),3)
        cv2.putText(frame, f'ID : {str(identity)}', (x,y-40), cv2.FONT_HERSHEY_DUPLEX, 1, (255,255,255), 1)
        cv2.putText(frame, f'Name : {str(name)}', (x,y-20), cv2.FONT_HERSHEY_DUPLEX, 1, (255,255,255), 1)
        
        cv2.imshow("Frame",frame)
        #key = cv2.waitKey(1)
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break
            
    finally:
        cap.release()
        cv2.destroyAllWindows()
    
    
    now = datetime.now()
    # dd/mm/YY H:M:S
    now = now.strftime("%d-%b-%Y_%H-%M-%S")
    attendance_df.to_excel('Attendances/'+now+'_attendance.xlsx')

In [63]:
student_df = pd.read_excel('Students_information.xlsx').set_index('ID')
inference(None)

In [67]:
student_df = pd.read_excel('Students_information.xlsx').set_index('ID')
student_df.loc[990] = "John Doe"
student_df.to_excel('Students_information.xlsx')

In [81]:
import keyboard
students_df = pd.read_excel('Students_information.xlsx').set_index('ID')

def new_entry_to_database(video_path, database_path = 'database', representations_path = 'representations'):
    
    #Confirmation: 
    while True:
        identity = int(input('Enter Student ID : '))
        name = input('Enter Student Name: ')
        print(f'Student name {identity} . Press \'y\' to confirm')
        key = input()
        if key == 'y':
            break
        else:
            continue
    
    students_df.loc[identity] = name
    
    if video_path:
        cap = cv2.VideoCapture(video_path)
    else: 
        cap = cv2.VideoCapture(0)
    
    #embeddings = load_representations(representations_path)
    
    try:
      while True:
        ret, frame = cap.read()

        cv2.imshow("Playing",frame)
        #key = cv2.waitKey(1)
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break
        
    finally:
        cap.release()
        cv2.destroyAllWindows()
    
    save_path = database_path + f'/{str(identity)}/'
    if os.path.isdir(save_path):
        cv2.imwrite(save_path+f'ID_{str(identity)}_{name}.jpg', frame)
    else:
        os.mkdir(save_path)
        cv2.imwrite(save_path+f'ID_{str(identity)}_{name}.jpg', frame)
    
    frame = frame[:, :, [2, 1, 0]]
    target_frame_size = (IMG_SIZE,IMG_SIZE)
    face_obj = extract_faces(frame, target_size = target_frame_size, enforce_detection = False)
    emb  = np.squeeze(get_embedding(face_obj[0][0][0]))
    
    add_representation(identity, emb)
    students_df.to_excel('Students_information.xlsx')

In [82]:
new_entry_to_database(None)

Enter Student ID : 1002
Enter Student Name: Anya Joya
Student name 1002 . Press 'y' to confirm
y
Face embeddings saved successfully to file
