# Objectives

In this notebook, we use the Siamese Network we trainied in `Pipeline2 Siamese_Network.ipynb` to build an CLI application that can be used to recognize faces in real time.



- Setup verification images/ Enrollment process

- Set up Login process


# 1. Import all dependencies, needed custom functions 

In [None]:
import cv2 # OpenCV
import os  # For file operations
import random
import numpy as np
from matplotlib import pyplot as plt # For plotting graphs
from mtcnn.mtcnn import MTCNN
from numpy import savez_compressed
from PIL import Image

# Import TensorFlow dependencies
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Conv2D, MaxPooling2D,  Dense, MaxPool2D, Flatten, Input
import tensorflow as tf


In [None]:
# Defined the Camera ID to use
CAM_ID = 0 # Establishing the connection with the IR camera

The Siamese nodel  need some custom function, Layer that we defined in the building phase.  They are not inside the model itself, so we need to import them. These functions are just copy paste from `Pipeline2 Siamese_Network.ipynb` and `Pipeline2 Data_Preparation.ipynb`

In [None]:
class L1Dist(Layer):
    def __init__(self, **kwargs):
         super(L1Dist, self).__init__(**kwargs)
    
    def call(self,input_embedding, validation_embedding):
        
        # Convert inputs to tensors otherwise will meet error: unsupported operand type(s) for -: 'List' and 'List'
        input_embedding = tf.convert_to_tensor(input_embedding)
        validation_embedding = tf.convert_to_tensor(validation_embedding)
        input_embedding = tf.squeeze(input_embedding, axis=0)  # Remove potential first dimension
        validation_embedding = tf.squeeze(validation_embedding, axis=0)

        # Calculate and return the L1 distance
        return tf.math.abs(input_embedding - validation_embedding)
    

In [None]:
## Load the model from the saved file ##
import tensorflow as tf
# Reload the model
model = tf.keras.models.load_model('model_saved/fully_siamese_network.h5', 
                                   custom_objects={'L1Dist': L1Dist, 'BinaryCrossentropy': tf.losses.BinaryCrossentropy},
                                   compile=False)
# Without complie=false cause Warning: WARNING:absl:No training configuration found in the save file, so the model was *not* compiled. Compile it manually.


In [None]:
def gaussian_blur(image, kernel_size=(3,3), sigma=0.1):
    """
    Apply Gaussian blur to an image using TensorFlow with auto-determined sigma.
    
    Args:
    - image: Input image tensor
    - kernel_size: Size of the Gaussian kernel (height, width)
    
    Returns:
    - Smoothed image
    """
    
    # Ensure the image is a tensor
    if not isinstance(image, tf.Tensor):
        image = tf.convert_to_tensor(image)
    
    # Ensure 4D tensor [batch, height, width, channels]
    if len(image.shape) == 3:
        image = image[tf.newaxis, :, :, :]
    
    # Create Gaussian kernel for each channel
    def create_gaussian_kernel(size, sigma=1.0):
        """Generate a 2D Gaussian kernel"""
        size = int(size)
        x, y = np.mgrid[-size//2 + 1:size//2 + 1, -size//2 + 1:size//2 + 1]
        g = np.exp(-((x**2 + y**2)/(2.0*sigma**2)))
        return g / g.sum()
    
    # Create kernel
    kernel_height, kernel_width = kernel_size
    kernel = create_gaussian_kernel(kernel_height, sigma)
    
    # Expand kernel for all channels
    num_channels = image.shape[-1]
    kernel_4d = np.expand_dims(kernel, axis=-1)
    kernel_4d = np.repeat(kernel_4d, num_channels, axis=-1)
    kernel_4d = np.expand_dims(kernel_4d, axis=-1)
    
    # Convert kernel to float32 tensor
    kernel_tensor = tf.convert_to_tensor(kernel_4d, dtype=tf.float32)
    
    # Apply convolution
    blurred = tf.nn.depthwise_conv2d(
        input=image, 
        filter=kernel_tensor, 
        strides=[1, 1, 1, 1], 
        padding='SAME'
    )
    
    # Remove batch dimension if it was added
    return blurred[0] if blurred.shape[0] == 1 else blurred

In [None]:
def preprocess(input_data):
    """
    Preprocess image data from various input formats into a standardized tensor.
    
    Args:
    input_data: Can be a file path (str), bytes tensor, numpy array, or PIL Image
    
    Returns:
    A preprocessed tensor of shape (100, 100, 3) with values in [0,1]
    """
    try:
        # Handle PIL Image input
        if isinstance(input_data, Image.Image):
            input_data = np.array(input_data)
        
        # Image decoding and initial processing
        if isinstance(input_data, (str, bytes)) or (isinstance(input_data, tf.Tensor) and input_data.dtype == tf.string):
            # Convert tensor to string if needed
            if isinstance(input_data, tf.Tensor):
                input_data = input_data.numpy()
            if isinstance(input_data, bytes):
                input_data = input_data.decode('utf-8')
            
            # Read and decode the image
            byte_image = tf.io.read_file(input_data)
            image = tf.image.decode_jpeg(byte_image, channels=3)
        else:
            # Handle numpy array or TensorFlow tensor input
            image = tf.convert_to_tensor(input_data)
        
        # Convert to float32
        image = tf.cast(image, tf.float32)
        
        # Ensure shape is correct
        if len(image.shape) != 3:
            raise ValueError(f"Expected image with 3 dimensions, got shape {image.shape}")
        
        # Resize the image
        image = tf.image.resize(image, (100, 100))
        
        # Smooth the image
        image = gaussian_blur(image, kernel_size=(3,3), sigma=0.1)
        
        # Normalize the image
        # WIth deep learing, it is ensential to normalize, so   can improve model 
        # performance by ensuring that input data is within a smaller, consistent range, which can help with stability during training.
        image = image / 255.0  # Normalize to [0,1]

        '''
        However, scaling might make the image look lower quality because of the smaller numerical range (0-1), even though 
        this does not actually affect its visual structure when used in a deep learning model. This step is not 
        meant for direct visualization, but rather for preparing data for model input.

        If you are trying to visually inspect the image to verify it after scaling, you can:
        '''
        
        return image
    
    except Exception as e:
        print(f"Error processing image: {str(e)}")
        print(f"Input type: {type(input_data)}")
        if isinstance(input_data, (str, bytes)):
            print(f"Input path: {input_data}")
        raise
# Note that our preprocess function return a Tensorflow tensor, not a numpy array, so when need  to  perform image 
# with OpenCV, we need to convert it to numpy array

# Wrap the preprocess function in a tf.py_function to deal with Frame objects in Opencv
def preprocess_wrapper(input_data):
    """Wrapper function to use with tf.py_function if needed"""
    return tf.py_function(preprocess, [input_data], tf.float32)

# 2. Overall process


1. **Enrollment**

Collect the personal images of that person for verification/enrollment process through webcam. It is similar to when you first choose the sign-in option in Windows Hello, where you need to scan your face images for the first time. These images will be stored to compare with the input image each time you log in to the computer later.

2. **Verification/Login**

Access webcam -> retrieve input image of user when they want to log in (this input image is processed directly without being stored to any file) -> use this image to verify against a number of positive samples (these positive samples are images already collected as part of our enrollment process). We store the positive samples or called validation images inside the `application_data/verification_images` folder. Each user on the system will have their own subfolder inside the `verification_images` folder.

With an input image, loop to compare against all, for example, 50 positive images in `validation_images/user_name` folder -> Our verification function will output 50 predictions. So for example, an input image + one verification image (1 of 50 images in folder) will be compared, and the output will be a number between 0 and 1. We must choose a threshold to determine if the input image is a match or not (**detection threshold**). After that, we get 50 results of matching or not matching. Then we choose a **Verification threshold** to determine the number of matching images out of 50 to be considered a valid authentication. For example, choose the threshold to be 0.8, meaning that if 80% of the 50 images match the input image, then we consider the input image to be a match.


![VerificationProcess](assets/images/VerificationProcess.drawio.png)

In [None]:
# Create base directory
base_dir = 'application_data'
os.makedirs(base_dir, exist_ok=True)

# Create validation images directory
validation_dir = os.path.join(base_dir, 'validation_images')
os.makedirs(validation_dir, exist_ok=True)



# 3. Enrollment process

In [None]:
# Prompt user to enter their name  to save the verifaction images of that person, then create
# a folder with the name of the person in the validation_images folder
name = input("Enter your name to store your personal verification data to system: ")
print("enrollment process begins, please look at the camera, rotate your head to the left and right")
print("Press 'p' to capture the images and store to the validation_images folder")
print("Press 'q' to stop the enrollment process")


# Define the base path
VALIDATION_PATH = os.path.join('application_data', 'validation_images')
# Create directory with name
new_dir_path = os.path.join(VALIDATION_PATH, name)
os.makedirs(new_dir_path, exist_ok=True)

# Initialize the webcam
import uuid # For generating unique image file names

# Function to save the captured image to the specified folder
def save_image(image, folder_path, img_name):
    img_path = os.path.join(folder_path, img_name)
    cv2.imwrite(img_path, image)

# Initialize the webcam
cap = cv2.VideoCapture(CAM_ID)
# Load the Haar Cascade Classifier for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Loop through every frame in the webcam feed
while cap.isOpened():
    
    ret, frame = cap.read()
    if not ret:
        break



    # Display the frame
    cv2.imshow('Face enrollment Process, p for capture, q for quite', frame)

    # Check for key presses
    key = cv2.waitKey(1) & 0xFF
    
    if key == ord('p'):
        # Detect face in the frame then crop to 250x250 around the face and save to 'data/positive'
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5)
        if len(faces) > 0:
            (x, y, w, h) = faces[0]
            cropped_face = frame[y:y+h, x:x+w]
            resized_face = cv2.resize(cropped_face, (250, 250))
           
            # At enrollment preocess, preprocess the image before save to file
            preprocessed_face = preprocess_wrapper(resized_face).numpy() # Preprocess the image, then convert to numpy array
            # Debugging: Check the shape and type of the preprocessed image
            print(f"Preprocessed face shape: {preprocessed_face.shape}, dtype: {preprocessed_face.dtype}")

            # Ensure the preprocessed image is in the correct format for saving
            preprocessed_face = (preprocessed_face * 255).astype(np.uint8)

            path = os.path.join(VALIDATION_PATH, name)
            save_image(preprocessed_face,path , str(uuid.uuid1())+ ".jpg")
            print("Image saved in ", path)
        else:
            # Show a dialog if no faces are detectedq
            print("No faces detected, look at the camera and cpature the image again")

    elif key == ord('q'):
        break


cap.release()
cv2.destroyAllWindows()


# 4. Login process

Now we build the validation function which take the input image directly from the webcam, then compare with the images in the validation_images/user_name folder. the parameters of this function are:

- `frame`: the input image from the webcam, frame object from OpenCV
- `name`: the name of the user that are trying to login
- `model`: the model we trained before to generate prediction
- `detection_threshold`: the threshold to determine if the input image is a match or not
- `verification_threshold`: the threshold to determine the number of matching out of total sample to be considered a valid authentication
- `LIMIT_IMAGES_TO_COMPARE`: the number of images in the validation_images/user_name folder to compare with the input image. As we testing, it takes  170ms-200ms to compare one image. So increase this number will increase the time to compare, but also increase the security, otherhand, decrease this number will decrease the time to compare, but also decrease the security.


In [None]:
def verify (frame, name ,model, detection_threshold, verfication_threshold, LIMIT_IMAGES_TO_COMPARE):
    # Detection Threshold: Metric above which the prediction is considered as positive
    # Verification Threshold: Proportion of positive detections/ total positive samples

    # Example, it te out comes prediction is 0.7, and the detection threshold is 0.5, then the prediction is positive
    # If 30 / 50 images pass the detection threshold, then it pass the verification threshold

    # Create result array
    results = []

    # Load the input image directly from the Webcam, preprocess it
    input_img = preprocess(frame).numpy()

    # Process when the name is not existed in the validation_images folder
    if not os.path.exists(os.path.join(VALIDATION_PATH, name)):
        print("The name does not exist in the system")
        return results, False

    # Loop through all the images in the validation_images folder (with crossponding name)
    path_of_validation_subfolder = os.path.join(VALIDATION_PATH,name)
    print("Compare with images in foler:", path_of_validation_subfolder)

    for image in os.listdir(path_of_validation_subfolder)[:LIMIT_IMAGES_TO_COMPARE]: #Limit to only comapre LIMIT_IMAGES_TO_COMPARE images instead of all images inside folder
        
        # Get each validation image
        # preprocess function from Part 3
        # The 'name' user input will be used to named the folder in the validation_images folder
        
        # validation_images  alreadly preprocessed at the enrollment process, so we just need to load the image

        # Why need to preprocess at the enrollment process, but not here? -> reduce response time in real time
        validation_img = cv2.imread(os.path.join(path_of_validation_subfolder, image), cv2.COLOR_BGR2GRAY)

        # Ensure both images have the same shape and number of channels
        if input_img.shape != validation_img.shape:
            print(f"Shape mismatch: input_img shape {input_img.shape}, validation_img shape {validation_img.shape}")
            continue


        

        # Pass two of these images to the model, with  and store preditcion to the array
        result = model.predict(list(np.expand_dims([input_img, validation_img], axis=1)))
        results.append(result)
        

    verification = np.sum(np.array(results) > detection_threshold) / len(results)
    if verification > verfication_threshold:
        verification = True
    else:
        verification = False

    # Return the verification result for futher processing
    return results, verification

# 5. Run the app

Conbine everythings together, take a single image from Webcam, then call the `verify` function

In [None]:

# Ask who is trying to sign in
name = input("Who are you")

# Initialize the webcam
cap = cv2.VideoCapture(CAM_ID)
# Load the Haar Cascade Classifier for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Loop through every frame in the webcam feed
while cap.isOpened():
    
    ret, frame = cap.read()
    if not ret:
        break



    # Display the frame
    cv2.imshow('Face Verification App, press v to capture', frame)

    # Check for key presses
    key = cv2.waitKey(1) & 0xFF
    
    if key == ord('v'):
        # Detect face in the frame then crop to 250x250 around the face and save to 'data/positive'
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5)
        if len(faces) > 0:
            (x, y, w, h) = faces[0]
            cropped_face = frame[y:y+h, x:x+w]
            resized_face = cv2.resize(cropped_face, (250, 250))
        
            # Run verification
            # with the input image take directly from the webcam, the validation images are taken from the validation_images/name_that_user_input folder
            results, verification = verify(resized_face, name, model, 0.7, 0.7, 4)

            # Arguemnt: 0.7, 0.7, 2
            # 0.7: Detection threshold
            # 0.7: Verification threshold
            # 2: Limit the number of images to compare to when validation
            
            # print the result
            # Print the input frame
            plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            plt.title("Input Frame")
            plt.show()


            #Print out the result
            print("Verification Result:", verification)
            results = np.array(results).flatten().tolist()
            print("Model prediction of matching for each validation image:", results)



    

        else:
            # Show a dialog if no faces are detectedq
            print("No faces detected, look at the camera and cpature the image again")

        break


cap.release()
cv2.destroyAllWindows()
