<a href="https://colab.research.google.com/github/cmarley3-14/EE443-Final-Project/blob/main/EE443_Final_Project_Face_Verification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Christopher Marley  
EE 443 - Prof. Hwang  
Final Project, Track 2: Face Verification  
June 5, 2023
  
  

**References**  
Training Data Source - https://www.kaggle.com/datasets/nhatdealin/casiawebface-dataset-crop  
Training Folder - https://drive.google.com/file/d/1PRiUyW2MzQPfBYlO4-LNJjmOnVHa1lCR/view?usp=share_link  
Validation/Testing Folder - https://drive.google.com/drive/folders/1FYVdAI74PVnJIYEqy5yPdrYA_4co6E3z  
Model and Weights Source - https://github.com/serengil/deepface/blob/ce4e4f664b66c05e682de8c0913798da0420dae1/deepface/DeepFace.py  
Siamese Network Source - https://www.kaggle.com/code/peremartramanonellas/how-to-create-a-siamese-network-to-compare-images#Notebooks-in-the-Tensorflow-Serie.

In [None]:
import cv2
from google.colab.patches import cv2_imshow

import gdown
import os
import time

from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import tensorflow as tf
from tensorflow.keras.preprocessing import image

!gdown --fuzzy https://drive.google.com/file/d/1tUKri0ObB6c9ODCGE0Ks_xm9Lr4mcvyp/view?usp=drive_link
!gdown --fuzzy https://drive.google.com/file/d/1CgYs86z9YB2yvCdLq9ra1v4RTX8T7ijT/view?usp=drive_link

!unzip -q -o "val.zip" # quiet overwrite
!unzip -q -o "test.zip" # quiet overwrite

Downloading...
From: https://drive.google.com/uc?id=1tUKri0ObB6c9ODCGE0Ks_xm9Lr4mcvyp
To: /content/val.zip
100% 1.86M/1.86M [00:00<00:00, 41.8MB/s]
Downloading...
From: https://drive.google.com/uc?id=1CgYs86z9YB2yvCdLq9ra1v4RTX8T7ijT
To: /content/test.zip
100% 5.09M/5.09M [00:00<00:00, 14.5MB/s]


### Helper Functions

In [None]:
def get_opencv_path():
    opencv_home = cv2.__file__
    folders = opencv_home.split(os.path.sep)[0:-1]
    return "/".join(folders) + "/data/"

def load_image(img):
    if type(img).__module__ == np.__name__:
        return img.copy()
    else:
        return cv2.imread(img)

def findEuclideanDistance(source_representation, test_representation):
    source_representation = np.array(source_representation)
    test_representation = np.array(test_representation)

    delta = source_representation - test_representation
    return np.sqrt(np.sum(np.multiply(delta, delta)))

def findCosineDistance(source_representation, test_representation):
    a = np.matmul(np.transpose(source_representation), test_representation)
    b = np.sum(np.multiply(source_representation, source_representation))
    c = np.sum(np.multiply(test_representation, test_representation))
    
    return 1 - (a / (np.sqrt(b) * np.sqrt(c)))

In [None]:

def build_cascade(model_name="haarcascade"):
    opencv_path = get_opencv_path()
    if model_name == "haarcascade":
        detector_path = opencv_path + "haarcascade_frontalface_default.xml"
    elif model_name == "haarcascade_eye":
        detector_path = opencv_path + "haarcascade_eye.xml"

    if os.path.isfile(detector_path) != True:
        raise ValueError("Confirm that opencv is installed on your environment! Expected path ", detector_path)
    detector = cv2.CascadeClassifier(detector_path)
    return detector

def build_detector_model():
    detector = {}
    detector["face_detector"] = build_cascade("haarcascade")
    detector["eye_detector"] = build_cascade("haarcascade_eye")
    return detector

In [None]:
def alignment_procedure(img, left_eye, right_eye):
    # this function aligns given face in img based on left and right eye coordinates

    left_eye_x, left_eye_y = left_eye
    right_eye_x, right_eye_y = right_eye

    # find rotation direction
    if left_eye_y > right_eye_y:
        point_3rd = (right_eye_x, left_eye_y)
        direction = -1  # rotate same direction to clock
    else:
        point_3rd = (left_eye_x, right_eye_y)
        direction = 1  # rotate inverse direction of clock

    # find length of triangle edges
    a = findEuclideanDistance(np.array(left_eye), np.array(point_3rd))
    b = findEuclideanDistance(np.array(right_eye), np.array(point_3rd))
    c = findEuclideanDistance(np.array(right_eye), np.array(left_eye))

    # find rotation angle
    if b != 0 and c != 0:
        cos_a = (b * b + c * c - a * a) / (2 * b * c)
        angle = np.arccos(cos_a)  # angle in radian
        angle = (angle * 180) / np.pi  # radian to degree

        # rotate base image
        if direction == -1:
            angle = 90 - angle

        img = Image.fromarray(img)
        img = np.array(img.rotate(direction * angle))

    return img 

def align_face(eye_detector, img):

    detected_face_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    eyes = eye_detector.detectMultiScale(detected_face_gray, 1.1, 10)

    # opencv's eye detection module is not strong. Isolate the two main eyes and assign L/R:
    eyes = sorted(eyes, key=lambda v: abs(v[2] * v[3]), reverse=True)

    if len(eyes) >= 2:
        eye_1, eye_2 = eyes[0], eyes[1]

        if eye_1[0] < eye_2[0]: # x,y,w,h
            left_eye = eye_1
            right_eye = eye_2
        else:
            left_eye = eye_2
            right_eye = eye_1

        # find center of eyes
        left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2)))
        right_eye = (int(right_eye[0] + (right_eye[2] / 2)), int(right_eye[1] + (right_eye[3] / 2)))
        img = alignment_procedure(img, left_eye, right_eye)

    return img  # return img anyway    

def detect_faces(detector, img, align=True):
    resp = []

    detected_face = None
    img_region = [0, 0, img.shape[1], img.shape[0]]

    try:
        faces, _, scores = detector["face_detector"].detectMultiScale3(img, 1.1, 10, outputRejectLevels=True)
    except Exception as e:
        faces = []

    if len(faces) > 0:
        for (x, y, w, h), confidence in zip(faces, scores):
            detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]

            if align:
                detected_face = align_face(detector["eye_detector"], detected_face)

            img_region = [x, y, w, h]

            resp.append((detected_face, img_region, confidence))

    return resp

In [None]:
def extract_faces(img, align=True):
    """Extract faces from an image.

    Args:
        img: a path or numpy array.
        align (bool, optional): whether to align the extracted faces. Defaults to True.

    Returns:
        list: a list of extracted faces.
    """

    # this is going to store a list of img itself (numpy), it region and confidence
    extracted_faces = []
    img = load_image(img)
    img_region = [0, 0, img.shape[1], img.shape[0]]
    target_size=(224, 224)

    detector = build_detector_model()
    face_objs = detect_faces(detector, img, align)

    if len(face_objs) == 0:
        face_objs = [(img, img_region, 0)]

    for current_img, current_region, confidence in face_objs:
        if current_img.shape[0] > 0 and current_img.shape[1] > 0:

            # resize and padding
            if current_img.shape[0] > 0 and current_img.shape[1] > 0:
                factor_0 = target_size[0] / current_img.shape[0]
                factor_1 = target_size[1] / current_img.shape[1]
                factor = min(factor_0, factor_1)

                dsize = (
                    int(current_img.shape[1] * factor),
                    int(current_img.shape[0] * factor),
                )
                current_img = cv2.resize(current_img, dsize)

                diff_0 = target_size[0] - current_img.shape[0]
                diff_1 = target_size[1] - current_img.shape[1]
                
                # Put the base image in the middle of the padded image
                current_img = np.pad(
                    current_img,
                    (
                        (diff_0 // 2, diff_0 - diff_0 // 2),
                        (diff_1 // 2, diff_1 - diff_1 // 2),
                        (0, 0),
                    ),
                    "constant",
                )

            # double check: if target image is not still the same size with target.
            if current_img.shape[0:2] != target_size:
                current_img = cv2.resize(current_img, target_size)

            # normalizing the image pixels
            img_pixels = image.img_to_array(current_img)
            img_pixels = np.expand_dims(img_pixels, axis=0)
            img_pixels /= 255  # normalize input in [0, 1]

            # int cast is for the exception - object of type 'float32' is not JSON serializable
            region_obj = {
                "x": int(current_region[0]),
                "y": int(current_region[1]),
                "w": int(current_region[2]),
                "h": int(current_region[3]),
            }

            extracted_face = [img_pixels, region_obj, confidence]
            extracted_faces.append(extracted_face)

    return extracted_faces

In [None]:
def represent(img_path, model, align):
    """
    This function represents facial images as vectors through a CNN model.

    Parameters:
            img_path (string): path or numpy array
            align (boolean): alignment according to the eye positions.
    """
    resp_objs = []
    img = load_image(img_path)

    if len(img.shape) == 4:
        img = img[0]  # e.g. (1, 224, 224, 3) to (224, 224, 3)

    if len(img.shape) == 3:
        img = cv2.resize(img, (224, 224))
        img = np.expand_dims(img, axis=0)
    
    img_region = [0, 0, img.shape[1], img.shape[0]]
    img_objs = [(img, img_region, 0)]

    return model.predict(img, verbose=0)[0].tolist()

### Model

In [None]:
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (
    Convolution2D,
    ZeroPadding2D,
    MaxPooling2D,
    Flatten,
    Dropout,
    Activation,
)

# ---------------------------------------

relu = lambda x: tf.math.maximum(x, 0.0)
def baseModel():
    model = Sequential(name = "base")
    model.add(ZeroPadding2D((1, 1), input_shape=(224, 224, 3), trainable=False))
    model.add(Convolution2D(64, (3, 3), activation=relu, trainable=False))
    model.add(ZeroPadding2D((1, 1), trainable=False))
    model.add(Convolution2D(64, (3, 3), activation=relu, trainable=False))
    model.add(MaxPooling2D((2, 2), strides=(2, 2), trainable=False))

    model.add(ZeroPadding2D((1, 1), trainable=False))
    model.add(Convolution2D(128, (3, 3), activation=relu, trainable=False))
    model.add(ZeroPadding2D((1, 1), trainable=False))
    model.add(Convolution2D(128, (3, 3), activation=relu, trainable=False))
    model.add(MaxPooling2D((2, 2), strides=(2, 2), trainable=False))

    model.add(ZeroPadding2D((1, 1), trainable=False))
    model.add(Convolution2D(256, (3, 3), activation=relu, trainable=False))
    model.add(ZeroPadding2D((1, 1), trainable=False))
    model.add(Convolution2D(256, (3, 3), activation=relu, trainable=False))
    model.add(ZeroPadding2D((1, 1), trainable=False))
    model.add(Convolution2D(256, (3, 3), activation=relu, trainable=False))
    model.add(MaxPooling2D((2, 2), strides=(2, 2), trainable=False))

    model.add(ZeroPadding2D((1, 1), trainable=False))
    model.add(Convolution2D(512, (3, 3), activation=relu, trainable=False))
    model.add(ZeroPadding2D((1, 1), trainable=False))
    model.add(Convolution2D(512, (3, 3), activation=relu, trainable=False))
    model.add(ZeroPadding2D((1, 1), trainable=False))
    model.add(Convolution2D(512, (3, 3), activation=relu, trainable=False))
    model.add(MaxPooling2D((2, 2), strides=(2, 2), trainable=False))

    model.add(ZeroPadding2D((1, 1), trainable=False))
    model.add(Convolution2D(512, (3, 3), activation=relu, trainable=False))
    model.add(ZeroPadding2D((1, 1), trainable=False))
    model.add(Convolution2D(512, (3, 3), activation=relu, trainable=False))
    model.add(ZeroPadding2D((1, 1), trainable=False))
    model.add(Convolution2D(512, (3, 3), activation=relu, trainable=False))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(Convolution2D(4096, (7, 7), activation=relu, trainable=False))
    model.add(Dropout(0.5))
    model.add(Convolution2D(4096, (1, 1), activation=relu, trainable=False))
    model.add(Dropout(0.5))
    model.add(Convolution2D(2622, (1, 1)))
    model.add(Flatten())
    model.add(Activation("softmax"))

    return model


# url = 'https://drive.google.com/uc?id=1CPSeum3HpopfomUEK1gybeuIVoeJT_Eo'


def loadModel():
    model = baseModel()
    url="https://github.com/serengil/deepface_models/releases/download/v1.0/vgg_face_weights.h5"

    output = "vgg_face_weights.h5"

    if os.path.isfile(output) != True:
        print("vgg_face_weights.h5 will be downloaded...")
        gdown.download(url, output, quiet=False)

    model.load_weights(output)
    vgg_face_descriptor = Model(inputs=model.layers[0].input, outputs=model.layers[-2].output)

    return vgg_face_descriptor

### Pre-Verification

In [None]:
def verify(img1_path, img2_path, model, distance_metric="cosine", align=True):
    """
    This function verifies an image pair is same person or different persons.

    Parameters:
            img1_path, img2_path: image path or numpy array
            model: CNN to use for representation
            distance_metric (string): cosine, euclidean, euclidean_l2
            align (boolean): alignment according to the eye positions.
    """

    tic = time.time()
    target_size = (224, 224)

    # img pairs might have many faces
    img1_objs = extract_faces(img1_path, align)
    img2_objs = extract_faces(img2_path, align)

    # --------------------------------
    distances = []
    regions = []
    # now we will find the face pair with minimum distance
    for img1_content, img1_region, _ in img1_objs:
        for img2_content, img2_region, _ in img2_objs:
            img1_vector = represent(img1_content, model, align)
            img2_vector = represent(img2_content, model, align)

            if distance_metric == "cosine":
                distance = findCosineDistance(img1_vector, img2_vector)
                threshold = 0.4
            elif distance_metric == "euclidean":
                distance = findEuclideanDistance(img1_vector, img2_vector)
                threshold = 0.6

            distances.append(distance)
            regions.append((img1_region, img2_region))

    # -------------------------------
    distance = min(distances)  # best distance
    facial_areas = regions[np.argmin(distances)]

    toc = time.time()

    resp_obj = {
        "verified": distance <= threshold,
        "distance": distance,
        "threshold": threshold,
        "similarity_metric": distance_metric,
        "facial_areas": {"img1": facial_areas[0], "img2": facial_areas[1]},
        "time": round(toc - tic, 2),
    }

    return resp_obj

In [None]:
# cv2_imshow(cv2.imread("val/val_020_1.jpg"))

In [None]:
if 'model' not in globals():
    model = loadModel()

with open("val.txt") as infile:
    answers = infile.read().strip().split("\n")[1:]

correct = 0
for i in range(len(answers)):
    v = verify(f"val/val_{i:03}_0.jpg", f"val/val_{i:03}_1.jpg", model, distance_metric="cosine", align=False)
    print(answers[i], v['verified'])
    if (int(answers[i].split(", ")[1]) == int(v['verified'])):
        correct += 1

print(correct)

vgg_face_weights.h5 will be downloaded...


Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/vgg_face_weights.h5
To: /content/vgg_face_weights.h5
100%|██████████| 580M/580M [00:22<00:00, 26.1MB/s]


0, 1 True
1, 1 True
2, 1 True
3, 1 True
4, 1 True
5, 1 True
6, 1 True
7, 1 True
8, 1 True
9, 1 True
10, 1 True
11, 1 True
12, 1 True
13, 1 True
14, 1 True
15, 1 True
16, 1 True
17, 1 True
18, 1 True
19, 1 True
20, 0 False
21, 0 False
22, 0 False
23, 0 True
24, 0 False
25, 0 False
26, 0 True
27, 0 False
28, 0 False
29, 0 False
30, 0 False
31, 0 True
32, 0 False
33, 0 True
34, 0 False
35, 0 False
36, 0 False
37, 0 False
38, 0 True
39, 0 True
40, 0 True
41, 0 True
42, 0 False
43, 0 False
44, 0 True
45, 0 True
46, 0 False
47, 0 True
48, 0 False
49, 0 True
50, 1 True
51, 1 True
52, 1 True
53, 1 True
54, 1 True
55, 1 True
56, 1 True
57, 1 True
58, 1 True
59, 1 True
60, 1 True
61, 1 True
62, 1 True
63, 1 True
64, 1 True
65, 1 True
66, 1 False
67, 1 True
68, 1 True
69, 1 True
70, 1 True
71, 1 True
72, 1 False
73, 1 True
74, 1 True
75, 0 False
76, 0 True
77, 0 False
78, 0 False
79, 0 False
80, 0 False
81, 0 False
82, 0 False
83, 0 False
84, 0 False
85, 0 False
86, 0 True
87, 0 False
88, 0 False

### Retraining Weights with Validation Data

In [None]:
from tensorflow.keras.layers import Input, Lambda
from tensorflow.keras import backend as K
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import RMSprop

def crop_face(img_path, align=True):
    # img pairs might have many faces
    # img_objs is List[Tuple]: [(img_data, {coordinates: int}, confidence)]
    img_objs = extract_faces(img_path, align)
    confidence = [c for _,_,c in img_objs]
    return img_objs[np.argmin(confidence)][0]

def euclidean_keras(vects):
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))

def shapes_keras(shapes):
    s0, s1 = shapes
    return (s1[0], 1)

def contrastive_loss_with_margin(margin):
    def contrastive_loss(y_true, y_pred):
        square_pred = K.square(y_pred)
        margin_square = K.square(K.maximum(margin - y_pred, 0))
        y_true = tf.cast(y_true, tf.float32)
        # print(y_true, square_pred)
        return (y_true * square_pred + (1 - y_true) * margin_square)
    return contrastive_loss


In [None]:
img_pairs = np.array([(crop_face(f"val/val_{i:03}_0.jpg"), crop_face(f"val/val_{i:03}_1.jpg")) for i in range(100)])
with open("val.txt") as infile:
    img_ids = infile.read().strip().split("\n")[1:]
    img_labels = np.array([int(idsim.split(", ")[1]) for idsim in img_ids])

training_pairs = img_pairs[:80]; training_labels = img_labels[:80]
val_pairs = img_pairs[80:]; val_labels = img_labels[80:]

In [None]:
if 'model' not in globals():
    model = loadModel()

input0 = Input(shape=(224, 224, 3), name="input0")
input1 = Input(shape=(224, 224, 3), name="input1")
embed0 = model(input0)
embed1 = model(input1)

output = Lambda(euclidean_keras, output_shape=shapes_keras, name="euclid_out")([embed0, embed1])

mid_model = Model([input0, input1], output)
tf.keras.utils.plot_model(mid_model, to_file='siamese_model.png', show_shapes=True)

mid_model.compile(loss=contrastive_loss_with_margin(margin=1),
              optimizer=RMSprop())
history = mid_model.fit(
    [np.squeeze(training_pairs[:,0]), np.squeeze(training_pairs[:,1])], 
    training_labels, epochs=20, 
    batch_size=64, 
    validation_data = ([np.squeeze(val_pairs[:, 0]), np.squeeze(val_pairs[:, 1])], val_labels))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
if 'model' not in globals():
    model = loadModel()

with open("val.txt") as infile:
    answers = infile.read().strip().split("\n")[1:]

correct = 0
for i in range(len(answers)):
    v = verify(f"val/val_{i:03}_0.jpg", f"val/val_{i:03}_1.jpg", model, distance_metric="cosine", align=False)
    print(answers[i], v['verified'], v['distance'])
    if (int(answers[i].split(", ")[1]) == int(v['verified'])):
        correct += 1

print(correct)

0, 1 True 0.10612048058846335
1, 1 True 0.09022813451348677
2, 1 True 0.02842296541724809
3, 1 True 0.04665698535445495
4, 1 True 0.06781651796969279
5, 1 True 0.03867433198398473
6, 1 True 0.07634177450742186
7, 1 True 0.06307627239636737
8, 1 True 0.03742126882621666
9, 1 True 0.05105123854955651
10, 1 True 0.046806632000560566
11, 1 True 0.07120320262740798
12, 1 True 0.1101698222494294
13, 1 True 0.11009617330063404
14, 1 True 0.12352728473308772
15, 1 True 0.03258806442445494
16, 1 True 0.05639799905941878
17, 1 True 0.12223715231029098
18, 1 True 0.3024325570797669
19, 1 True 0.04593615172043963
20, 0 False 0.7630613439449696
21, 0 False 0.6535077391523803
22, 0 False 1.1117871796980747
23, 0 False 0.6978726778053775
24, 0 False 0.8446446484380623
25, 0 False 0.9090173306172559
26, 0 True 0.1291814750727115
27, 0 False 0.494910175353523
28, 0 False 0.8547774798310384
29, 0 False 0.808231091154152
30, 0 False 0.8273721495121137
31, 0 False 0.5385029000776087
32, 0 False 1.09664660

### Applying Model to Test Data

In [None]:
if 'model' not in globals():
    model = loadModel()

for i in range(400):
    v = verify(f"test/test_{i:03}_0.jpg", f"test/test_{i:03}_1.jpg", model, distance_metric="cosine", align=False)
    print(i, v['verified'], v['distance'])
    
    infile = open("result.txt", "a")
    infile.write(str(int(v['verified']))); infile.write("\n")
    infile.close()

0 True 0.28108660355756687
1 False 0.7809562056718098
2 True 0.21566995978678805
3 True 0.189715168469495
4 False 0.5381993141349342
5 False 0.5813889293780288
6 False 0.44480609499784174
7 True 0.33819726589560384
8 False 0.9183240119841972
9 False 0.6191177304298314
10 True 0.3314927797812539
11 False 0.6500023264836368
12 True 0.3201386285750618
13 True 0.16684884631477903
14 False 0.7054669070450205
15 False 0.45948782141527944
16 False 0.622553309127847
17 True 0.27672186831164747
18 False 0.9152508347160806
19 False 0.4914070007399258
20 False 0.4360480402587542
21 True 0.2959182917948966
22 False 0.6923066888623808
23 True 0.06694563391624031
24 False 0.5926736139248804
25 False 0.5360922530183878
26 True 0.21778783956875036
27 False 0.7017400872183661
28 False 0.5335548149209344
29 False 0.8834837983727355
30 False 0.8233996693496491
31 False 0.7033516875375547
32 False 0.7019913269975362
33 False 0.8752670284763298
34 True 0.3345130811501217
35 True 0.2904642024022749
36 True 