# Face Alignment - Data Analysis

- Add the project's root directory (two levels up) to the Python path so the modules can be imported, even if they arent in the current working directory:

In [None]:
import sys
import os

project_root = os.path.abspath(os.path.join('..', '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

- Import the required libraries and modules, as well as our utility functions:

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

from src.utils import load_config, get_project_root

## 1. Data Loading and Exploration

- Load the config using the utility function. Get paths to relevant folders/files needed to save and retrieve files:

In [None]:
config = load_config()

train_data_path = os.path.join(get_project_root(), config['data']['task2']['raw']['train'])
test_data_path = os.path.join(get_project_root(), config['data']['task2']['raw']['test'])

raw_test_data = os.path.join(get_project_root(), test_data_path.replace('/', os.sep))
raw_train_data = os.path.join(get_project_root(), train_data_path.replace('/', os.sep))

- Load the training data, reference the images and image landmarks:

In [None]:
data = np.load(raw_train_data, allow_pickle=True)
images = data['images']
pts = data['points']

- Print some key information about our images and points:

In [None]:
print("Training images shape:", images.shape)
print("Landmark points shape:", pts.shape)

- Load the test data, reference the images:

In [None]:
test_data = np.load(raw_test_data, allow_pickle=True)
test_images = test_data['images']

- Print some key information about our images:

In [None]:
print("Test images shape:", test_images.shape)

## 2. Data Visualisation

- This will display a selected image with red '+' marked at the specific landmark locations

In [None]:
def show_points_on_image(image, points):
    plt.imshow(image, cmap='gray')
    plt.plot(points[:, 0], points[:, 1], '+', color='red')
    plt.axis('off')
    plt.tight_layout()
    plt.show()

- Choose 3 images by random, with their corresponding landmark points:

In [None]:
for _ in range(3):
    random_index = np.random.randint(0, images.shape[0])
    image = images[random_index]
    point_set = pts[random_index]
    show_points_on_image(image, point_set)

## 3. Mean Face

- Get the mean of all images and the mean of all landmark points, and plot them together. This will give us a good idea of where abouts the points are coming:

In [None]:
mean_image = np.mean(images, axis=0)
mean_points = np.mean(pts, axis=0)

show_points_on_image(mean_image, mean_points)

## 4. Statistical Analysis

In [None]:
landmark_std = np.std(pts, axis=0)

plt.figure(figsize=(10, 4))

- Plot the standard deviation of the x-coordinates:

In [None]:
plt.subplot(1, 2, 1)
plt.bar(range(5), landmark_std[:, 0], color='steelblue')
plt.title("X-Coordinate Standard Deviation")
plt.xlabel("Landmark Index")
plt.ylabel("Standard Deviation")

- Plot the standard deviation of the y-coordinates:

In [None]:
plt.subplot(1, 2, 2)
plt.bar(range(5), landmark_std[:, 1], color='indianred')
plt.title("Y-Coordinate Standard Deviation")
plt.xlabel("Landmark Index")
plt.ylabel("Standard Deviation")

In [None]:
plt.tight_layout()
plt.show()

- Heatmap of landmarked positions:

In [None]:
# Flatten x and y coordinates into single arrays
x_coords = pts[:, :, 0].flatten()
y_coords = pts[:, :, 1].flatten()

- Create a 2D histogram to show where abouts landmarks are being positioned:

In [None]:
plt.figure(figsize=(6, 6))
plt.hist2d(x_coords, y_coords, bins=50, cmap='hot')
plt.title('Landmark Positions Density')
plt.xlabel('X-Coordinate')
plt.ylabel('Y-Coordinate')

plt.colorbar(label='Number of Points')
plt.gca().invert_yaxis() # match image coords
plt.tight_layout()
plt.show()

## 5. Correlation Between Points

- Flatten landmarks into a single vector, and then compute the correlation matrix:

In [None]:
flattened_points = pts.reshape(pts.shape[0], -1)
correlation_matrix = np.corrcoef(flattened_points.T)

- Plot the correlation matrix as a heatmap:

In [None]:
plt.figure(figsize=(12, 10))
sns.heatmap(corr, cmap='coolwarm', center=0)
plt.title("Landmark Coordinates Correlation")
plt.tight_layout()
plt.show()