# Face Alignment - Feature Engineering

- Add the project's root directory (two levels up) to the Python path so the modules can be imported, even if they arent in the current working directory:

In [None]:
import sys
import os

project_root = os.path.abspath(os.path.join('..', '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

- Import the required libraries and modules, as well as our utility functions:

In [None]:
import numpy as np
import cv2

from src.utils import load_config, get_project_root, save_as_npz

- Load the config using the utility function. Get paths to relevant folders/files needed to save and retrieve files:

In [None]:
config = load_config()

processed_train_data_path = config['data']['task2']['processed']['train']
processed_test_data_path = config['data']['task2']['processed']['test']

processed_train_data = os.path.join(get_project_root(), processed_train_data_path.replace('/', os.sep), "processed_face_alignment_train_images.npz")
processed_test_data = os.path.join(get_project_root(), processed_test_data_path.replace('/', os.sep), "processed_face_alignment_test_images.npz")

- Load the images and the landmark data. We can load the "npz" file by using numpy's load function:

In [None]:
train_data = np.load(processed_train_data, allow_pickle=True)
test_data = np.load(processed_test_data, allow_pickle=True)

In [None]:
train_images = train_data['images']
train_points = train_data['points']
test_images = test_data['images']

- Extract SIFT features from an image. This will give us some key points in the image, which will be very useful when training out model:

In [None]:
def extract_sift_features(image, step=4):
    sift = cv2.SIFT_create()
    keypoints = [cv2.KeyPoint(x, y, step) for y in range(step, image.shape[0], step)
                 for x in range(step, image.shape[1], step)]
    _, descriptors = sift.compute(np.uint8(image * 255), keypoints)
    return descriptors.flatten() if descriptors is not None else np.zeros((1, 128))

- We will use a Canny edge detector to extract any edge-based features. This will be useful when we train our model because landmarks are near edges:

In [None]:
def extract_edge_features(image):
    edges = cv2.Canny(np.uint8(image * 255), 100, 200)
    return (edges[::4, ::4].flatten() / 255.0)

- This extracts some basic statistics about the intensity - this is the mean and standard deviation:

In [None]:
def extract_intensity_stats(image):
    return np.array([np.mean(image), np.std(image)])

- We will combine all these features into one vector. This just means our "npz" file will have one extra attribute:

In [None]:
def extract_features(image):
    sift_feat = extract_sift_features(image)
    edge_feat = extract_edge_features(image)
    intensity_feat = extract_intensity_stats(image)
    return np.concatenate([sift_feat, edge_feat, intensity_feat])

- Extract the features from the images in the training dataset and the testing dataset:

In [None]:
X_train = [extract_features(img) for img in train_images]
X_test = [extract_features(img) for img in test_images]

- Convert into a numpy array:

In [None]:
X_train = np.array(X_train)
X_test = np.array(X_test)

- Save the extracted features to a new file which can be retrieved and used for training:

In [None]:
feature_processed_train_data = os.path.join(get_project_root(), processed_train_data_path.replace('/', os.sep), "processed_face_alignment_train_images_features.npz")
feature_processed_test_data = os.path.join(get_project_root(), processed_test_data_path.replace('/', os.sep), "processed_face_alignment_test_images_features.npz")

save_as_npz(feature_processed_train_data, images = train_images, points = train_points, features = X_train)
save_as_npz(feature_processed_test_data, images = test_images, features = X_test)