In [53]:
import numpy
import matplotlib.pyplot as pyplot

from numpy.random import RandomState
from sklearn.datasets import fetch_olivetti_faces

dataset = fetch_olivetti_faces(shuffle = True, random_state = RandomState(0))

# Load and condition dataset

In [54]:
faces = dataset.data

print('faces: %s' % str(faces.shape)) # |samples| x |features|

# global centering
faces_mean = faces.mean(axis = 0)
faces_centered = faces - faces_mean

# local centering
faces_centered -= faces_centered.mean(axis = 1).reshape(faces.shape[0], -1)

faces_centered.shape # |samples| x |features|

split = int(0.25*len(faces))

test_faces = faces_centered[:split]
test_targets = dataset.target[:split]

train_faces = faces_centered[split:]
train_targets = dataset.target[split:]

faces: (400, 4096)


# Perform Eigen Analysis

## Compute surrogate covariance matrix

In [55]:
A = train_faces
print('A: %s' % str(A.shape)) # |samples\ x |features|
L = A.dot(A.T)
print('L: %s' % str(L.shape)) # |samples| x |samples|

A: (300, 4096)
L: (300, 300)


## Compute strong eigen vectors

In [56]:
(L_eigenvalues, L_eigenvectors) = numpy.linalg.eig(L)
L_eigenvectors_strong = L_eigenvectors[:,numpy.array([True if (x > 1) else False for x in L_eigenvalues])]
print('L eigenvectors: %s' % str(L_eigenvectors_strong.shape)) # |samples| x |strong samples|

L eigenvectors: (300, 294)


## Compute eigenfaces

In [57]:
C_eigenvectors = A.T.dot(L_eigenvectors_strong).T # eigenfaces
eigenfaces = C_eigenvectors
print('eigenfaces: %s' % str(eigenfaces.shape)) # |samples| x |features|

eigenfaces: (294, 4096)


## Project faces into eigenface space

In [58]:
# Project all training data into eigenface space
train_faces_projected = numpy.vstack([eigenfaces.dot(train_face) for train_face in train_faces])
train_faces_projected.shape # |samples| x |strong samples|

(300, 294)

# Perform test on faces outside our model

In [60]:
# Match test faces with training

correct = 0.0
trails = len(test_faces)

for test_index in range(trails):
    
    test_face = test_faces[test_index]
    test_target = test_targets[test_index]
    
    test_face_projected = eigenfaces.dot(test_face) # Note: faces are already normalized
    
    distances = numpy.array([((test_face_projected - train_face_projected)**2).sum() for train_face_projected in train_faces_projected])
    
    guess_index = distances.argmin()
    guess_face = train_faces[guess_index]
    guess_target = train_targets[guess_index]
    
    is_correct = (test_target == guess_target)
    
    if (is_correct):
        correct += 1.0
    
    # Show some examples of matching...
    if (test_index < 10):
        # Test face...
        pyplot.subplot(1,2,1)
        pyplot.title('Test face')
        pyplot.imshow(
            test_face.reshape([64, 64]),
            cmap = pyplot.cm.gray,
            interpolation = 'nearest'
        )
        # Guess face...
        pyplot.subplot(1,2,2)
        pyplot.title('Guess face [%s match]' % ('Correct' if is_correct else 'Incorrect'))
        pyplot.imshow(
            guess_face.reshape([64, 64]),
            cmap = plt.cm.gray,
            interpolation = 'nearest'
        )
        # Show...
        pyplot.show()

print '%d%% accuracy' % int(100.0*correct/trails)
print '... amongst %d unique people' % int(len(frozenset(dataset.target)))

81% accuracy
... amongst 40 unique people
