# Face Recognition using Principal Component Analysis

We intend to perform face recognition. Face recognition means that for a
given image you can tell the subject id. For our implmentation we will make use of the following 2 datasets:

1) https://git-disl.github.io/GTDLBench/datasets/att_face_dataset/

2) http://cvit.iiit.ac.in/projects/IMFDB/

## Libraries

In [8]:
#Importing the required libraries to use later
import matplotlib.image as img
from numpy import linalg as LA
from scipy.spatial import distance
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np

import math
import pickle
import operator
import os

## Helper functions

In [9]:
#Helper function to help us in classification tasks
def Test(data_mat0, data_mat1, label_mat0, label_mat1):
    '''
        This function expects omega i.e projected test face and Phi i.e Signature of each face
        and it then returns the accuracy of classfication for the test data.
    '''
    length = data_mat0.shape[0]
    map_ = np.zeros((1, length))
    
    #To store predicted labels for the testting data
    label_mat_new = np.zeros((1, length))
    
    #To store accuracy for the testing data
    accuracy_mat = np.ones((1, length))

    for i in range(0, length):
        for j in range(0, length):
            #Calculating eucledian distance between projected test data and signature of each face 
            map_[0, j] = distance.euclidean(data_mat1[i], data_mat0[j])
            
        #Assiging the label corresponding to the minimum distance 
        arg = map_.argmin()
        label_mat_new[0, i] = label_mat0[arg]
        if label_mat_new[0, i] != label_mat1[i]:
            #If predicted label and actual label don't match then it is not accurate and insert 0
            accuracy_mat[0, i] = 0
    return 100 * np.sum(accuracy_mat) / np.size(accuracy_mat)


In [10]:
def Train(data_matrix, k, str):
    '''
        The function expects training data and a value k (number of selected feature vectors).
        This function performs all the required training steps.
    '''
    isAlpha = False
    number = 0
    
    # Generating mean-zero delta (dimensions: mn*p)
    data_matrix_centered = data_matrix - np.mean(data_matrix, axis=0)
    
    #Generating covariance of mean-zero delta (dimension: p*p)
    data_matrix_cov = np.cov(data_matrix_centered, rowvar=False)
    
    #Extracting EigenValue and EigenVector
    (eigenValues, eigenVectors) = LA.eigh(data_matrix_cov)
    idx = eigenValues.argsort()[::-1]
    eigenValues = eigenValues[idx]
    eigenVectors = eigenVectors[:, idx]
    total = np.sum(eigenValues)

    #Extracting best direction and k value i.e getting a feature vector having dimnesion: p*k
    while isAlpha == False:
        sum_eigVal = 0.0
        for i in range(eigenValues.size):
            sum_eigVal = sum_eigVal + eigenValues[i] / total
            number += 1
            if math.isclose(sum_eigVal, k) or sum_eigVal > k:
                isAlpha = True
                break

    #projection matrix containing Eigen faces of dimension: k*mn           
    projection_matrix = np.matrix([eigenVectors[n] for n in
                                  range(number)]).T

    #Saving the objects for future use
    if not Path(str + '.pickle').exists():
        with open(str + '.pickle', 'wb') as handle:
            pickle.dump(projection_matrix, handle,
                        protocol=pickle.HIGHEST_PROTOCOL)
    else:
        print ('Error.' + ' Another file with same name already found.')
    return projection_matrix


In [11]:
def PCA(data_matrix, proj_matrix, k, str):
    
    '''
    This generates Projection matrix the returned matrices
    have dimensions as k*1
    '''
    data_matrix_centered = data_matrix - np.mean(data_matrix, axis=0)
    if proj_matrix is None:
        #Call the train fucntion if no proj matrix exists
        projection_matrix = Train(data_matrix, k, str)
    else:
        projection_matrix = proj_matrix
        
    #Convert to eigen faces
    rd_data_matrix = np.matmul(data_matrix_centered, projection_matrix)
    return rd_data_matrix

## Generating the Data Matrix and their Labels

In [12]:
#Reading the images and storing them while maining the label structure
imgMat = np.zeros((0, 10304))
temp = np.arange(1, 41, 1) #We have 40 people

label_matrix = np.array([[temp[i]] * 10 for i in range(temp.size)])
label_matrix = label_matrix.flatten()

#Reading greyscale images form the ATT dataset
folder = 'ATT/'
for j in range(1, 41):
    direction = folder + 's' + str(j) + '/'
    for i in range(1, 11):
        directory = direction + str(i) + '.pgm'
        image = img.imread(directory).T
        imageVect = np.asmatrix(image.flatten())
        imgMat = np.concatenate((imgMat, imageVect))

## Spliting the Dataset into Training and Test sets 

In [13]:
# training_data_matrix, test_data_matrix = np.split(imgMat, [int(.6 * len(imgMat))])
# label_training, label_test = np.split(label_matrix, [int(.6 * len(label_matrix))])
test_data_matrix = imgMat[0:400:2]
training_data_matrix = imgMat[1:400:2]

label_test = label_matrix[0:400:2]
label_training = label_matrix[1:400:2]

## Results

In [14]:

l = []
for i in range(80,99,2):
    l.append(i/100.0)
    
print(l)
k = np.matrix([l])

for o in range(k.size):
    with open('proj_data_mat_' + str(k[0, o]) + '.pickle', 'rb') as \
        handle:
        proj_data_mat = pickle.load(handle)
    training_data_matrix_rd = PCA(training_data_matrix, proj_data_mat,
                                   k[0, o], '')
    test_data_matrix_rd = PCA(test_data_matrix, proj_data_mat,
                               k[0, o], '')
    acc_prc = Test(training_data_matrix_rd, test_data_matrix_rd,
                       label_training, label_test)
    print ('K: '+ str(k[0, o]) + ' Accuracy = ' + str(acc_prc) + '%\n')

[0.8, 0.82, 0.84, 0.86, 0.88, 0.9, 0.92, 0.94, 0.96, 0.98]


FileNotFoundError: [Errno 2] No such file or directory: 'proj_data_mat_0.8.pickle'