In [8]:
import dlib
import cv2
import numpy as np
import os
import re
import glob
import warnings
import shutil
from sklearn.model_selection import train_test_split

# 1. Parsing Face Dataset 

In [9]:
base_directory = 'Dataset/VISA_Face/VISA_Face'
face_images = []


def parse_face_dataset():
    # Clear the face_images list before processing the dataset
    face_images.clear()

    for path in glob.iglob(base_directory + '/*'):
        filename = os.path.basename(path)

        # string manipulation
        underscore_index = filename.find("_")
        filename_parsed = filename[:underscore_index]
        match = re.search(r"(.*?)_2017_001", filename)
        if match:
            filename_parsed = match.group(1)
        else:
            warnings.warn(f"No match found for filename: {filename}")
            continue  # Skip processing this file

        label = filename_parsed
        image_id = 0

        for image_path in glob.iglob(path + '/*'):
            try:
                image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
                if image is None:
                    warnings.warn(f"Failed to load image: {image_path}")
                    continue
                # Resize image to reduce memory usage
                image = cv2.resize(image, (400, 300))
                face_images.append([image, image_id, label])
                image_id += 1
            except Exception as e:
                warnings.warn(f"Error processing image: {image_path}\n{e}")

    print('Total Face Images Found: ' + str(len(face_images)))

    return face_images

# 2. Face Detection Function

In [10]:

def face_detection(face_images, display):
    pre_processed_images = []
    face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml')

    # Output directory for storing the detected faces
    output_dir = os.path.join('Face_Output', 'Face_Output_Detection')

    # Clear output directory if it already exists
    if os.path.exists(output_dir):
        for filename in os.listdir(output_dir):
            file_path = os.path.join(output_dir, filename)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
            except Exception as e:
                print(f"Failed to delete {file_path}: {e}")

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    for face_image in face_images:
        (image, image_id, label) = face_image
        image_id += 1
        faces = face_cascade.detectMultiScale(image, 1.1, 4)

        for (x, y, width, height) in faces:
            face = image[y:y + height, x:x + width]

            # Save cropped face image
            output_path = os.path.join(
                output_dir, f'{label}_{image_id}_Cropped.jpg')
            cv2.imwrite(output_path, face)

            pre_processed_images.append([face, image_id, label])

# 3. Face Feature Extraction Function

In [11]:
def facial_feature_extraction(input_directory, output_dir):
    # Initialize face detector and shape predictor
    detector = dlib.get_frontal_face_detector()
    predictor_path = 'Dependencies/shape_predictor_68_face_landmarks.dat'
    predictor = dlib.shape_predictor(predictor_path)

    # Create output directory if it doesn't exist
    output_dir = os.path.join(output_dir, 'Face_Output_Feature_Extraction')

    # Clear output directory if it already exists
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)  # Remove the directory and its contents

    os.makedirs(output_dir, exist_ok=True)

    # Initialize lists to store features and labels
    features = []
    labels = []

    # Iterate over images in the input directory
    for filename in os.listdir(input_directory):
        if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png'):
            # Read the image
            image_path = os.path.join(input_directory, filename)
            image = cv2.imread(image_path)
            if image is None:
                continue

            # Detect faces in the image
            dets = detector(image, 1)

            # Iterate over detected faces
            for i, d in enumerate(dets):
                # Predict facial landmarks
                shape = predictor(image, d)

                # Extract features
                # Distance between the eyes
                eye_distance = shape.part(45).x - shape.part(36).x
                nose_shape = calculate_nose_shape(shape)  # Shape of the nose
                lips_contour = calculate_lips_contour(
                    shape)  # Contour of the lips
                # Patterns of wrinkles around the mouth
                mouth_wrinkles = calculate_mouth_wrinkles(shape)

                # Append features to the feature vector
                feature_vector = [eye_distance] + \
                    nose_shape + lips_contour + mouth_wrinkles

                # Add feature vector and filename as label
                features.append(feature_vector)
                labels.append(filename)

                # Draw lines between facial landmarks on the image
                draw_lines(image, shape)

            # Save image with landmarks and detected faces
            output_path = os.path.join(output_dir, filename)
            cv2.imwrite(output_path, image)

    return features, labels


def calculate_eye_distance(shape):
    # Calculate the Euclidean distance between the outer corners of the eyes
    left_eye_outer_corner = (shape.part(36).x, shape.part(36).y)
    right_eye_outer_corner = (shape.part(45).x, shape.part(45).y)
    eye_distance = math.sqrt((right_eye_outer_corner[0] - left_eye_outer_corner[0])**2 + (
        right_eye_outer_corner[1] - left_eye_outer_corner[1])**2)
    return eye_distance


def calculate_nose_shape(shape):
    # Calculate the shape of the nose based on the landmarks
    # This is just an example, you may need to define your own logic
    # Return a list of values representing the nose shape
    nose_shape = []
    # Example: Append the x and y coordinates of some nose landmarks
    nose_shape.append(shape.part(30).x)  # Tip of the nose
    nose_shape.append(shape.part(33).y)  # Bridge of the nose
    return nose_shape


def calculate_lips_contour(shape):
    # Calculate the contour of the lips based on the landmarks
    # This is just an example, you may need to define your own logic
    # Return a list of values representing the lips contour
    lips_contour = []
    # Example: Append the x and y coordinates of some lip landmarks
    lips_contour.append(shape.part(48).x)  # Left corner of the lips
    lips_contour.append(shape.part(54).x)  # Right corner of the lips
    return lips_contour


def calculate_mouth_wrinkles(shape):
    # Calculate the patterns of wrinkles around the mouth based on the landmarks
    # This is just an example, you may need to define your own logic
    # Return a list of values representing the mouth wrinkles
    mouth_wrinkles = []
    # Example: Calculate the difference in y-coordinates between upper and lower lip
    upper_lip_y = shape.part(51).y
    lower_lip_y = shape.part(57).y
    mouth_wrinkles.append(lower_lip_y - upper_lip_y)
    return mouth_wrinkles


def draw_lines(image, shape):
    # Draw lines between specific facial landmarks
    lines = [(30, 33), (48, 54), (48, 57), (36, 45)]  # Nose, lips, eyes
    for start, end in lines:
        cv2.line(image, (shape.part(start).x, shape.part(start).y),
                 (shape.part(end).x, shape.part(end).y), (255, 0, 0), 2)

# 4. Face Landmarks Extraction Function

In [12]:
def extract_facial_landmarks(input_dir, output_dir):
    # Initialize face detector and shape predictor
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(
        'Dependencies/shape_predictor_68_face_landmarks.dat')

    # Create output directory if it doesn't exist
    output_dir = os.path.join(output_dir, 'Face_Output_Landmark_Extraction')

    # Clear output directory if it already exists
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)  # Remove the directory and its contents
    os.makedirs(output_dir, exist_ok=True)

    # Iterate over images in the input directory
    for filename in os.listdir(input_dir):
        if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png'):
            # Read the image
            image_path = os.path.join(input_dir, filename)
            image = cv2.imread(image_path)
            if image is None:
                continue

            # Detect faces in the image
            dets = detector(image, 1)

            # Iterate over detected faces
            for i, d in enumerate(dets):
                shape = predictor(image, d)
                landmarks = [(shape.part(i).x, shape.part(i).y)
                             for i in range(68)]

                # Draw landmarks on the image
                for (x, y) in landmarks:
                    cv2.circle(image, (x, y), 1, (0, 0, 255), -1)

                # Save the image with landmarks
                output_path = os.path.join(
                    output_dir, f'{os.path.splitext(filename)[0]}_landmarks_{i}.jpg')
                cv2.imwrite(output_path, image)

# 5. Landmarks to Feature Conversion Vectors Function

In [13]:
def landmarks_to_features(landmarks, output_dir):
    # Create output directory if it doesn't exist
    output_dir = os.path.join(output_dir, 'Face_Output_LFCV')

    # Clear output directory if it already exists
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)  # Remove the directory and its contents
    os.makedirs(output_dir, exist_ok=True)

    for i, landmark_set in enumerate(landmarks):
        feature_vector = np.array(landmark_set).flatten()
        output_path = os.path.join(output_dir, f'landmarks_{i}.npy')
        np.save(output_path, feature_vector)

# 0. Main Function

In [14]:
if __name__ == "__main__":

    # PHASE 1 - Parse the face dataset
    face_images = parse_face_dataset()
    print("Parsing Face Dataset STARTED...")
    print("Parsing Face Dataset COMPLETE!")

# PHASE 2 - Perform face detection
    print("Face Image Preprocessing STARTED...")
    # Suppress display for face detection
    face_detection(face_images, display=False)
    print("Face Image Preprocessing COMPLETE!")

# PHASE 3 - Perform facial feature extraction
    print("Face Feature Extraction STARTED...")
    # Input directory containing images with detected faces
    input_directory = 'Face_Output/Face_Output_Detection'

    # Output directory for saving images with landmarks and detected faces
    output_directory = 'Face_Output'

    features, labels = facial_feature_extraction(
        input_directory, output_directory)
    print("Face Feature Extraction COMPLETE!")

# PHASE 4 - Extract facial landmarks from an image
    print("Extracting facial landmarks STARTED...")
    # Input directory containing images with extracted facial features
    input_directory = 'Face_Output/Face_Output_Feature_Extraction'

    # Output directory for saving images with facial landmarks
    output_directory = 'Face_Output'

    extract_facial_landmarks(input_directory, output_directory)
    print("Extracting facial landmarks COMPLETE!")

# PHASE 5 - Convert facial landmarks into feature vectors
    print("Converting facial landmarks to feature vectors STARTED...")
    landmarks_to_features(features, output_dir='Face_Output')
    print("Converting facial landmarks to feature vectors COMPLETE!")

    # Post-Phase 5
    print("Number of feature vectors generated:", len(
        os.listdir('Face_Output/Face_Output_LFCV')))

# PHASE 6 - Splitting Data (80% Training, 20% Test)
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        features, labels, test_size=0.2, random_state=42)

    # Print the sizes of the training and testing sets
    print("Training set size:", len(X_train))
    print("Testing set size:", len(X_test))

 # PHASE 7



Total Face Images Found: 558
Parsing Face Dataset STARTED...
Parsing Face Dataset COMPLETE!
Face Image Preprocessing STARTED...
Face Image Preprocessing COMPLETE!
Face Feature Extraction STARTED...
Face Feature Extraction COMPLETE!
Extracting facial landmarks STARTED...
Extracting facial landmarks COMPLETE!
Converting facial landmarks to feature vectors STARTED...
Converting facial landmarks to feature vectors COMPLETE!
Number of feature vectors generated: 328
Training set size: 262
Testing set size: 66
