# EigenFaces

## Step 1: Preprocessing the Images

### Load the images

In [None]:
import bz2
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from numpy import intp
import random

In [None]:
directory = 'feret/'
faces = {}
num_of_faces_loaded = 20 #change this value to adjust the number of faces to be loaded

for index, person_id in enumerate(os.listdir(directory)):
    if index > num_of_faces_loaded:
        break

    person_path = os.path.join(directory, person_id)
    
    if os.path.isdir(person_path):
        for image_id in os.listdir(person_path):
            image_path = os.path.join(person_path, image_id)

            if image_path.endswith(".ppm.bz2"):
                with bz2.BZ2File(image_path, 'rb') as f:
                    content = f.read()
                image = cv2.imdecode(np.frombuffer(content, np.uint8), cv2.IMREAD_GRAYSCALE)

                file_path = "/".join(image_path.split("/")[1:])
                faces[file_path] = image

### Let's see some of them

In [None]:
fig, axes = plt.subplots(4,4,sharex=True,sharey=True,figsize=(8,10))
faceimages = list(faces.values())[-16:]
lbls = list(faces.keys())[-16:]
for i in range(16):
    axes[i%4][i//4].set_title(lbls[i].split("/")[0])
    axes[i%4][i//4].imshow(faceimages[i], cmap="gray")
print("Showing sample faces")
plt.show()

### Some details about the data

In [None]:
faceshape = list(faces.values())[0].shape
print("Face image shape:", faceshape)
classes = set(filename.split("/")[0] for filename in faces.keys())
print("Number of classes:", len(classes))
print("Number of images:", len(faces))

### Split the data into training and test data

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
facematrix = []
facelabel = []
for key,val in faces.items():
    facematrix.append(val.flatten())
    facelabel.append(key.split("/")[0])

facematrix = np.array(facematrix)
facelabel = np.array(facelabel)

In [None]:
Xtr, Xts, ytr, yts = train_test_split(facematrix, facelabel, test_size=0.25, stratify = facelabel)

# Step 2: Training

### Apply PCA and take first K principal components as eigenfaces

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA().fit(Xtr)

In [None]:
n_components = len(Xtr)
eigenfaces = pca.components_[:n_components]

### Let's see some of the eigenfaces

In [None]:
fig, axes = plt.subplots(4,4,sharex=True,sharey=True,figsize=(8,10))
for i in range(16):
    axes[i%4][i//4].imshow(eigenfaces[i].reshape(faceshape), cmap="gray")
    axes[i%4][i//4].set_title(ytr[i])
print("Showing the eigenfaces")
plt.show()

### Generate weights as a KxN matrix where K is the number of eigenfaces and N the number of samples

In [None]:
weights = eigenfaces @ (Xtr - pca.mean_).T
print("Shape of the weight matrix:", weights.shape)

### Compute the threshold

In [None]:
def compute_threshold(ytr, Xts, yts, weights, eigenfaces, mean_face):
    distances = []

    for i in range(len(Xts)):
        test_image = Xts[i].reshape(1, -1)
        test_weight = eigenfaces @ (test_image - mean_face).T
        euclidean_distance = np.linalg.norm(weights - test_weight, axis=0)
        best_match = np.argmin(euclidean_distance)


        if yts[i] == ytr[best_match]:
            distances.append(euclidean_distance[best_match])

        return max(distances)

# Step 3: Test

In [None]:
def eigenface_predict(test_image, eigenfaces, weights, mean_face, threshold):
    test_weight = eigenfaces @ (test_image - mean_face).T
    euclidean_distance = np.linalg.norm(weights - test_weight, axis=0)
    best_match = np.argmin(euclidean_distance)


    reconstructed_face = pca.mean_ + (test_weight.T @ eigenfaces)
    reconstruction_error = np.linalg.norm(test_image - reconstructed_face)

    text = "Match found!"

    if reconstruction_error > threshold:
        text = "Not a face"
    elif euclidean_distance[best_match] > threshold:
        text = "Unknown face"

    return [text, best_match, euclidean_distance[best_match]]

In [None]:
def check_accuracy(y_true, y_pred):
    if len(y_true) != len(y_pred):
        raise ValueError("Input lists must have the same length.")

    correct = 0
    total = len(y_true)

    for true_label, predicted_label in zip(y_true, y_pred):
        if true_label == predicted_label:
            correct += 1

    accuracy = (correct / total) * 100
    return accuracy, correct

### Test the algorithm using images from the training set

In [None]:
threshold = compute_threshold(ytr, Xts, yts, weights, eigenfaces, pca.mean_)
print(f"Optimum threshold: {threshold}")

In [None]:
num_of_test = 10
for i in range(num_of_test):
    num = random.randint(0, len(Xts) - 1)
    test_image = Xts[num].reshape(1, -1)

    text, best_match, euclidean_distance = eigenface_predict(test_image, eigenfaces, weights, pca.mean_, threshold)

    fig, axes = plt.subplots(1, 2, sharex=True, sharey=True, figsize=(8, 6))
    axes[0].imshow(test_image.reshape(faceshape), cmap="gray")
    axes[0].set_title(f"Test Image")

    print(f"Test Image {i}: {text}\nBest Match: {ytr[best_match]} with Euclidean distance {euclidean_distance}")
    axes[1].imshow(Xtr[best_match].reshape(faceshape), cmap="gray")
    axes[1].set_title(f"{text}\nBest match - {ytr[best_match]}")
        

    plt.show()


### Check the Accuracy

In [None]:
predictions = []

for i in range(len(Xts)):
    test_image = Xts[i].reshape(1, -1)

    text, best_match, euclidean_distance = eigenface_predict(test_image, eigenfaces, weights, pca.mean_, threshold)
    predictions.append(ytr[best_match])

In [None]:
accuracy, correct = check_accuracy(yts, predictions)

In [None]:
print(f"Accuracy: {accuracy}")
print(f"{correct} guesses out of {len(Xts)}")