# **Pattern Recognition - Machine Learning** | Assignment 2





#**1) Face Recognition**





In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# **I) Image Loading and Sets creation**

In [None]:
import glob
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow

# Define path
path = '/content/drive/MyDrive/faces'

# Ranges for separating images based on the selection pattern
set_ranges = {
    'Set_1': (1, 7),
    'Set_2': (8, 19),
    'Set_3': (20, 31),
    'Set_4': (32, 45),
    'Set_5': (46, 64)
}

# Function for loading images and storing them in an array
def Load_Images(path, set_number):

    # Printing format
    print("\n-----------------------------------", set_number, "-----------------------------------\n")
    (start, end) = set_ranges[set_number]
    columns = 10*(end-start+1)
    groups = end-start+1
    values = 2500
    total_images = 640

    # Initialize the image array (2500 rows, 70 columns)
    image_array = np.zeros((values, columns))

    current_column = 0
    image_paths = []
    image_name = []
    labels = []

    # Find images based on the specified pattern
    for person in range(10):
        for i in range(start, end+1):

            # Create file name based on the acquire pattern
            image_pattern = f"person{person+1:02d}_{i:02d}.png"
            full_path = path + "/" + image_pattern
            image_paths.append(full_path)
            image_name.append(image_pattern)

    person = 0
    current_column = 0

    # Find and print images by category and in the order defined by the acquire pattern
    k = 0
    for full_path in image_paths:

        if (current_column % groups == 0):
          person += 1
          print("\n____________ Images of person", person, "____________\n")

        labels.append(person)
        image = cv2.imread(full_path, 0)

        # Read image with OpenCV
        print(image_name[k], " | shape", image.shape)
        if image is not None:
          plt.imshow(image, cmap='bone')
          plt.show()
        else:
          print("Failed to read the image.")

        # Transform the image into a column vector
        flattened_image = image.reshape(-1, 1)

        # Store the image vector as a column in the image array of each 'image set'
        image_array[:, current_column] = flattened_image.flatten()

        # Counter and column update
        current_column += 1
        k += 1

    # Return the array of the corresponding set of images
    return [image_array, labels]

# Define and format the names of the arrays
(start, end) = set_ranges['Set_1']
columns = 10*(end-start+1)
Set_1 = np.zeros((2500, columns))

(start, end) = set_ranges['Set_2']
columns = 10*(end-start+1)
Set_2 = np.zeros((2500, columns))

(start, end) = set_ranges['Set_3']
columns = 10*(end-start+1)
Set_3 = np.zeros((2500, columns))

(start, end) = set_ranges['Set_4']
columns = 10*(end-start+1)
Set_4 = np.zeros((2500, columns))

(start, end) = set_ranges['Set_5']
columns = 10*(end-start+1)
Set_5 = np.zeros((2500, columns))

# Function calls
Set_1 = Load_Images(path, 'Set_1')[0]
labels_1 = Load_Images(path, 'Set_1')[1]
Set_2 = Load_Images(path, 'Set_2')[0]
labels_2 = Load_Images(path, 'Set_2')[1]
Set_3 = Load_Images(path, 'Set_3')[0]
labels_3 = Load_Images(path, 'Set_3')[1]
Set_4 = Load_Images(path, 'Set_4')[0]
labels_4 = Load_Images(path, 'Set_4')[1]
Set_5 = Load_Images(path, 'Set_5')[0]
labels_5 = Load_Images(path, 'Set_5')[1]

# **II) Eigenfaces method training on Set_1 and face recognition trial for the other Sets**

In [None]:
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score

# Training set
train_set = Set_1

# Create lists for better data retrieval in the following loops
d_values = [9, 30]
accuracy_scores = []
sets_labels = [labels_1, labels_2, labels_3, labels_4, labels_5]
test_sets = [Set_1, Set_2, Set_3, Set_4, Set_5]

print("\n----------------------- Accuracy Scores for PCA Eigenfaces with KNN classifier -----------------------\n")

# For d=9 and d=30
for d in d_values:

    # Apply PCA to the training set
    pca = PCA(n_components=d)
    transformed_data = pca.fit_transform(train_set.T)

    # Find the Eigenfaces
    eigenfaces = pca.components_.T

    # Project the Training data onto the Eigenfaces
    training_data = transformed_data

    # K-nearest neighbors classifier
    from sklearn.neighbors import KNeighborsClassifier

    # Define and fit the K-nearest neighbors classifier
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(training_data, labels_1)

    # Evaluate the model on Set_1 to Set_5
    for i, test_set in enumerate(test_sets):

        # Apply PCA to each test Set using the same Eigenfaces
        transformed_test_set = pca.transform(test_set.T)

        # Use the trained model to predict the image category
        predicted_labels = knn.predict(transformed_test_set)

        # Calculate the accuracy of the model on each Set
        accuracy = accuracy_score(sets_labels[i], predicted_labels)
        accuracy_scores.append(accuracy)
        print(f"Accuracy for Set_{i+1} with d={d}: {accuracy*100:.2f}%")


We observe that the model has a decreasing percentage of successful classification/matching of images to the corresponding person as we move from one Set_i to Set_(i+1). This is due to the fact that each image Set differs from the others, for example, in terms of lighting, image clarity, facial line characteristics, and other features. Thus, each dataset matrix contains different density and quality of information that the model can leverage, resulting in the generalization of the method being less effective across all image Sets. We notice that for Set_1, which is our training set, the accuracy is 100%, which is expected since the model was initially trained on it.

# **III) Visualization of Eigenvectors of the trained model**



In [None]:
# plot values και specs
fig, axes = plt.subplots(3, 3, figsize=(10, 10))

for i, ax in enumerate(axes.flatten()):
    eigenvector = eigenfaces[:, i]
    ax.imshow(eigenvector.reshape(50, 50), cmap='gray')
    ax.set_title(f"Eigenvector {i+1}")

plt.tight_layout()
plt.show()

The 9 main eigenfaces that resulted from the PCA analysis represent the most significant patterns/features of the training images. Each eigenface is an eigenvector of the covariance matrix calculated from the training image dataset.

Eigenfaces capture the dominant variations in values present in face images. They essentially reflect a subset of the face data, such as lighting conditions, facial expressions, and different facial features (e.g., eyes, nose, mouth, lips, angles, etc.). This subset of data is, in a sense, characteristic and unique to each individual's face, aiding the model in identifying and recognizing other images belonging to the test sets.

The first eigenface (eigenvector) corresponds to the largest eigenvalue and captures the most prominent variation of characteristics in the face images. It is equivalent to the average shape of the dataset, representing the mean facial features present in all faces. Each subsequent eigenface shows a different data structure for the same qualitative characteristics (e.g., lighting, angles, dark colors, eyes, mouth, etc.), indicating varying degrees/levels of deviation compared to the dominant eigenface 1. In fact, upon careful observation of the eigenfaces, we can say that the first face is the most distinct from the rest. The faces 4-5-6 and 7-8-9 resemble each other more as trios, just like the pair 2-3.

In summary, the 9 main eigenfaces indicate the most important differences in face images, capturing the fundamental patterns that distinguish one face from another.

# **IV) Face image recreation through Eigenfaces**

In [None]:
# Function for image reconstruction based on Eigenfaces
def recreate_image(eigenfaces, coefficients):
  mean_face = np.mean(train_set.T, axis=0)
  return mean_face + np.dot(coefficients, eigenfaces.T)

# For d=9 and d=30
for d in d_values:

  # Evaluation of the model on Set_1 to Set_5
  for i, test_set in enumerate(test_sets):

      # Randomly select an image from the test Set
      random_index = np.random.choice(test_set.shape[1])
      selected_image = test_set[:, random_index]

      # Reconstruct the image using Eigenfaces
      reconstructed_image = recreate_image(eigenfaces, transformed_test_set[random_index])

      # Reshaping images for plotting
      selected_image = selected_image.reshape(50, 50)
      reconstructed_image = reconstructed_image.reshape(50, 50)

      # Plotting the original and approximately reconstructed images

      # Original
      plt.figure()
      plt.subplot(1, 2, 1)
      plt.imshow(selected_image, cmap='gray')
      plt.title(f"Original Image (Set_{i+1})")
      plt.axis("off")

      # Reconstructed
      plt.subplot(1, 2, 2)
      plt.imshow(reconstructed_image, cmap='gray')
      plt.title(f"Reconstructed Image (Set_{i+1}, d={d})")
      plt.axis("off")

      plt.show()

We observe that for d=9, the effectiveness of image reconstruction is lower compared to d=30. This is logical because in the first case, the dimensionality of the data is lower, so the model has fewer elements available to capture the true structure of the image it is trying to approximate. For d=30, its effectiveness increases.

Additionally, we can see that when the facial image is quite dark and not many lines and facial features are discernible, the model performs significantly better for d=30 compared to d=9. This can be explained by the fact that for d=30, it has more data available to predict/reconstruct the genuine image, even with significant missing elements (e.g., dark areas, lack of lines, etc.).

# **V) Depiction of Eigenvectors with SVD pre-processing**

In [None]:
# Apply SVD on training Set 'Set_1'
U, s, V = np.linalg.svd(Set_1)

# Select the first 9 singular vectors
main_singular_vectors = U[:, :9]

# Plotting of the first 9 singular vectors
fig, axes = plt.subplots(3, 3, figsize=(10, 10))
axes = axes.flatten()

# Images plotting
for i, ax in enumerate(axes):
    singular_vector = main_singular_vectors[:, i]
    # Images reshaping
    image = singular_vector.reshape(50, 50)
    ax.imshow(image, cmap='gray')
    ax.set_title(f"Singular Vector {i+1}")

# Subplots spacing
plt.tight_layout()
plt.show()

Eigenvectors and singular vectors differ due to the different computational approaches used to calculate them.

- Eigenvectors, generated by PCA, indicate the axes of the n-dimensional space where the most variability/diversity of the data is observed. Therefore, these axes/dimensions are the ones utilized by the model to recognize patterns and faces in subsequent analysis.

- On the other hand, singular vectors, produced by SVD, represent the relationship between the rows and columns of the matrices storing the image data.

In general, the choice between eigenvectors and singular vectors depends on the nature of the data and the problem we are trying to solve.

#**2) Image classification using SVMs**

* Data acquiring και normalization

In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# MNIST dataset
mnist = fetch_openml('mnist_784', version=1, cache=True)

# Get image data and labels
X = mnist['data']
y = mnist['target']

# Image conversion into 28x28 vectors
X = X.values.reshape(-1, 28, 28)

# Normalize data in [0, 1]
scaler = MinMaxScaler()
X = scaler.fit_transform(X.reshape(-1, 28*28))

# Select the frst 30000 samples for the training set
X_train, X_test, y_train, y_test = train_test_split(X[:30000], y[:30000], test_size=10000, random_state=42)

# Save the training and testing sets
np.savez('mnist_train.npz', X_train=X_train, y_train=y_train)
np.savez('mnist_test.npz', X_test=X_test, y_test=y_test)

* Training and comparative evaluation of linear SVM kernel and RBF kernel with combinations of hyperparameters.

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# Search parameters
param_grid = [
    {'kernel': ['linear'], 'C': [0.1, 1, 10]},
    {'kernel': ['rbf'], 'C': [0.1, 1, 10], 'gamma': [0.1, 0.01, 0.001]}
]

# SVM classifier init
svm = SVC()

# grid search
grid_search = GridSearchCV(svm, param_grid, scoring='accuracy')

# data fitting
grid_search.fit(X_train, y_train)

# Find the best model and parameters for the Training Set
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_

# Results printing
print("Results for Training Set:")
print("========================")
for params, mean_score, std_score in zip(grid_search.cv_results_['params'], grid_search.cv_results_['mean_test_score'], grid_search.cv_results_['std_test_score']):
    print("Kernel: {}, C: {}, Gamma: {}".format(params['kernel'], params['C'], params.get('gamma', 'N/A')))
    print("Accuracy: {:.2f}% (+/- {:.2f}%)".format(mean_score * 100, std_score * 100))
    print()

# Find the best model and parameters for the Training Set
y_train_pred = best_model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)

# Results printing
print("> Best Model Hyperparameters:")
print("> ", best_params)
print("> Accuracy on Training Set: {:.2f}%".format(train_accuracy * 100))

# Results printing
print("\nResults for Test Set:")
print("=====================")
for params, mean_score, std_score in zip(grid_search.cv_results_['params'], grid_search.cv_results_['mean_test_score'], grid_search.cv_results_['std_test_score']):
    print("Kernel: {}, C: {}, Gamma: {}".format(params['kernel'], params['C'], params.get('gamma', 'N/A')))
    print("Accuracy: {:.2f}% (+/- {:.2f}%)".format(mean_score * 100, std_score * 100))
    print()

# Find the best model and parameters for the Testing Set
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Results printing
print("> Best Model Hyperparameters:")
print("> ", best_params)
print("> Accuracy on Test Set: {:.2f}%".format(accuracy * 100))

30,000 images were selected as the training dataset for the model, and correspondingly, 10,000 images for testing, due to the significantly long execution time of the code (>12 hours) and resource usage limit on GColab for 60,000 images as the training dataset. Since even with 30,000 images, the execution takes approximately 2 hours, the printed results are as follows:

Results for the Training Set:
========================
Kernel: linear, C: 0.1, Gamma: N/A
Accuracy: 93.59% (+/- 0.37%)

Kernel: linear, C: 1, Gamma: N/A
Accuracy: 92.26% (+/- 0.53%)

Kernel: linear, C: 10, Gamma: N/A
Accuracy: 91.44% (+/- 0.53%)

Kernel: rbf, C: 0.1, Gamma: 0.1
Accuracy: 40.90% (+/- 0.69%)

Kernel: rbf, C: 0.1, Gamma: 0.01
Accuracy: 93.34% (+/- 0.20%)

Kernel: rbf, C: 0.1, Gamma: 0.001
Accuracy: 87.12% (+/- 0.30%)

Kernel: rbf, C: 1, Gamma: 0.1
Accuracy: 90.69% (+/- 0.26%)

Kernel: rbf, C: 1, Gamma: 0.01
Accuracy: 96.33% (+/- 0.15%)

Kernel: rbf, C: 1, Gamma: 0.001
Accuracy: 92.25% (+/- 0.41%)

Kernel: rbf, C: 10, Gamma: 0.1
Accuracy: 91.23% (+/- 0.22%)

Kernel: rbf, C: 10, Gamma: 0.01
Accuracy: 97.36% (+/- 0.19%)

Kernel: rbf, C: 10, Gamma: 0.001
Accuracy: 94.39% (+/- 0.31%)

> Best Model Hyperparameters:
> {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}
> Accuracy on the Training Set: 99.98%

Results for the Test Set:
=====================
Kernel: linear, C: 0.1, Gamma: N/A
Accuracy: 93.59% (+/- 0.37%)

Kernel: linear, C: 1, Gamma: N/A
Accuracy: 92.26% (+/- 0.53%)

Kernel: linear, C: 10, Gamma: N/A
Accuracy: 91.44% (+/- 0.53%)

Kernel: rbf, C: 0.1, Gamma: 0.1
Accuracy: 40.90% (+/- 0.69%)

Kernel: rbf, C: 0.1, Gamma: 0.01
Accuracy: 93.34% (+/- 0.20%)

Kernel: rbf, C: 0.1, Gamma: 0.001
Accuracy: 87.12% (+/- 0.30%)

Kernel: rbf, C: 1, Gamma: 0.1
Accuracy: 90.69% (+/- 0.26%)

Kernel: rbf, C: 1, Gamma: 0.01
Accuracy: 96.33% (+/- 0.15%)

Kernel: rbf, C: 1, Gamma: 0.001
Accuracy: 92.25% (+/- 0.41%)

Kernel: rbf, C: 10, Gamma: 0.1
Accuracy: 91.23% (+/- 0.22%)

Kernel: rbf, C: 10, Gamma: 0.01
Accuracy: 97.36% (+/- 0.19%)

Kernel: rbf, C: 10, Gamma: 0.001
Accuracy: 94.39% (+/- 0.31%)

> Best Model Hyperparameters:
> {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}
> Accuracy on the Test Set: 97.55%

We observe that the success rates of the RBF model, even with 30,000 training images, are very high (~97-99%). Therefore, if we choose more training samples (60,000), this success rate can only increase, but without significantly changing the already obtained result.

#**3) Image classification after PCA and using SVMs**

In [None]:
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
import time

conserved_variances = [0.90, 0.85, 0.80]

for variance in conserved_variances:
    print("Conserved Variance: {:.2f}".format(variance))

    # Apply PCA with various 'conserved variance' values
    pca = PCA(n_components=variance)
    X_train_pca = pca.fit_transform(X_train)

    # SVM classifier init
    svm = SVC(C=10, gamma=0.01, kernel='rbf')

    # Timer start
    start_time = time.time()

    # SVM fitting on the training data
    svm.fit(X_train_pca, y_train)

    # Timer stop and elapsed time calculation
    elapsed_time = time.time() - start_time

    # Transform of the testing data with the same PCA
    X_test_pca = pca.transform(X_test)

    # Predictions of the model on the testing data
    y_pred = svm.predict(X_test_pca)

    # Accuracy calculation
    accuracy = accuracy_score(y_test, y_pred)

    print("Data Components Preserved: {}".format(pca.n_components_))
    print("Accuracy: {:.2f}%".format(accuracy * 100))
    print("Elapsed Time: {:.2f} seconds".format(elapsed_time))
    print("-----------------------------------\n")

* When selecting a large number of dimensions for analysis, there is a risk of providing "too many" reference points and analyses to the model, potentially leading to an inability to draw accurate conclusions due to "overfitting." On the other hand, if we provide too few reference points/dimensions, the model will have insufficient data, leading to deviations due to "data starvation."

* Additionally, when aiming for higher accuracy in results, it requires more execution time, and when the model operates in higher dimensions, data analysis also takes more time.

* Therefore, it is necessary to initially choose an appropriate number of dimensions to avoid "overfitting" and "data starvation." Furthermore, depending on the requirements of the problem we want to solve, we choose whether to emphasize accuracy or execution speed.