## Eigenfaces - Face Recognition

#### Importing necessary libraries

In [1]:
import os
import numpy as np
from PIL import Image

#### Loading and transforming the images into vectors

In [18]:
def load_images(path):
    images = {}
    for file in filter(lambda file: not file.startswith('.'), os.listdir(path)):
        img = Image.open(os.path.join(path, file))
        idx = int(file.split('_')[0])
        if not idx in images:
            images[idx] = []
        images[idx].append(np.asarray(img))

    return images

def transform_to_vec(dataset):
    for person_face in dataset:
        dataset[person_face] = list(map(lambda img: img.reshape(-1,1), dataset[person_face]))
    return dataset

def compute_average(dataset):
    average_face = {}
    for person_face in dataset:
        person_faces = dataset[person_face]
        average_face[person_face] = np.sum(person_faces, axis=0) / len(person_faces)
    return average_face

def normalize_dataset(dataset, mean):
    for person_face in dataset:
        dataset[person_face] = list(map(lambda face: face - mean[person_face],dataset[person_face]))
    return dataset

def compute_covariance_matrix(dataset):
    cov_matrixes = {}
    for person_face in dataset:
        matrixes = list(map(lambda face: face * face.T, dataset[person_face]))
        for matrix in matrixes:
            if person_face not in cov_matrixes:
                cov_matrixes[person_face] = matrix
            else:
                cov_matrixes[person_face] += matrix
        cov_matrixes[person_face] *=  1 / len(dataset[person_face])
    return cov_matrixes
            

In [40]:
DATA_PATH = '../data'
TRAIN_DATA_PATH = os.path.join(DATA_PATH, 'train')
TEST_DATA_PATH = os.path.join(DATA_PATH, 'test')

train_data = load_images(TRAIN_DATA_PATH)
test_data = load_images(TEST_DATA_PATH)

train_data = transform_to_vec(train_data)
test_data = transform_to_vec(test_data)

mean_train_faces = compute_average(train_data)
train_data = normalize_dataset(train_data, mean_train_faces)
cov_matrices = compute_covariance_matrix(train_data)

for matrix in cov_matrices:
    print(cov_matrices[matrix].shape)
    values, vects = np.linalg.eig(cov_matrices[matrix])
    print([i for i,v in enumerate(values > 0.5) if v])
    


(1200, 1200)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
(1200, 1200)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
(1200, 1200)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
(1200, 1200)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
(1200, 1200)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
