In [1]:
#Steps 1-3 is PCA part of project, runs from the same folder on GitHub as project.ipynb
#Uses data on 40 subjects 10 images 64x64=4096 pixels face_data.csv
#Uses 9 images 1-9 to create an eigenface, image 0 for test
#Creates file eigenfaces.csv 40 first eigenfaces x 4096 pixels with mean added back
#Step 4 calculates cosine similarity between test image and eigenfaces, recognizes subject with max cosim
#Not recognizing (cutoff) not implemented
import math
import numpy as np 
from numpy import linalg as la
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from time import time

In [2]:
#get data
import csv
data = []
with open('data/face_data/face_data.csv') as csvfile:
    reader = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in reader:
        data.append(row)
data=np.array(data)
idata = data[1:,:data.shape[1]-1].astype(np.float32)
idata = idata.reshape(40,10,4096) #40 subjects 10 images 64x64

In [3]:
#create array of first eigenfaces 40x4046, exclude image zero from training data
#takes a while to run, may consider compressing images to 16x16
t0 = time()
eface = []
for i in range(40):
    mu = np.mean(idata[i,1:,:], 0)
    pca = PCA(n_components=1, svd_solver='randomized', whiten=True).fit(idata[i,1:,:])
    eface.append(pca.components_[0].reshape(4096,) + mu)
np.savetxt("data/face_data/eigenfaces.csv", eface, delimiter=",")
t_pca = (time() - t0)
print("computed in %0.3fs" % t_pca)

computed in 0.586s


In [4]:
#test on image zero using cosine similarity score (cosim distance)
t0 = time()
s=0
for i in range(40):
    image = idata[i,0,:].T.reshape(4096,)
    dist = np.zeros(40)
    eface = np.array(eface)
    for e in range(40):
        dist[e] = (eface[e]@image)/(la.norm(eface[e])*la.norm(image))
    print('subject', i, 'recognized as', np.where(dist == max(dist))[0][0], 'cosim =', 
          round(max(dist),2))
    if i == np.where(dist == max(dist))[0][0]:
        s += 1
print('Accuracy:', s/40)
print("computed in %0.3fs" % (time() - t0 + t_pca))

subject 0 recognized as 0 cosim = 0.99
subject 1 recognized as 1 cosim = 0.99
subject 2 recognized as 2 cosim = 0.99
subject 3 recognized as 3 cosim = 0.99
subject 4 recognized as 4 cosim = 1.0
subject 5 recognized as 5 cosim = 0.99
subject 6 recognized as 6 cosim = 0.99
subject 7 recognized as 7 cosim = 0.98
subject 8 recognized as 8 cosim = 0.99
subject 9 recognized as 9 cosim = 0.99
subject 10 recognized as 10 cosim = 0.99
subject 11 recognized as 11 cosim = 0.99
subject 12 recognized as 12 cosim = 0.99
subject 13 recognized as 13 cosim = 0.99
subject 14 recognized as 14 cosim = 0.99
subject 15 recognized as 15 cosim = 0.98
subject 16 recognized as 16 cosim = 0.99
subject 17 recognized as 17 cosim = 0.99
subject 18 recognized as 18 cosim = 0.98
subject 19 recognized as 19 cosim = 0.98
subject 20 recognized as 4 cosim = 0.99
subject 21 recognized as 21 cosim = 0.98
subject 22 recognized as 22 cosim = 0.99
subject 23 recognized as 23 cosim = 0.99
subject 24 recognized as 24 cosim = 0.

In [5]:
#train SVM on eigenfaces and test on image zero  
y_train = np.array(list(range(40)))

t0 = time()
clf = SVC(kernel='linear', C=1.0)
clf = clf.fit(eface, y_train)

s=0
for i in range(40):
    print('subject', i, 'recognized as', clf.predict(idata[i,0,:].reshape(1,4096))[0])
    if i == clf.predict(idata[i,0,:].reshape(1,4096))[0]:
        s += 1
print('Accuracy:', s/40)
print("computed in %0.3fs" % (time() - t0 + t_pca))

subject 0 recognized as 0
subject 1 recognized as 1
subject 2 recognized as 2
subject 3 recognized as 3
subject 4 recognized as 4
subject 5 recognized as 5
subject 6 recognized as 6
subject 7 recognized as 7
subject 8 recognized as 8
subject 9 recognized as 9
subject 10 recognized as 10
subject 11 recognized as 11
subject 12 recognized as 12
subject 13 recognized as 13
subject 14 recognized as 14
subject 15 recognized as 15
subject 16 recognized as 16
subject 17 recognized as 17
subject 18 recognized as 18
subject 19 recognized as 19
subject 20 recognized as 20
subject 21 recognized as 21
subject 22 recognized as 22
subject 23 recognized as 23
subject 24 recognized as 24
subject 25 recognized as 25
subject 26 recognized as 26
subject 27 recognized as 27
subject 28 recognized as 28
subject 29 recognized as 29
subject 30 recognized as 30
subject 31 recognized as 31
subject 32 recognized as 32
subject 33 recognized as 33
subject 34 recognized as 34
subject 35 recognized as 35
subject 36 r

In [6]:
t0 = time()
clf = LogisticRegression(solver='lbfgs', multi_class='auto')
clf = clf.fit(eface, y_train)

s=0
for i in range(40):
    print('subject', i, 'recognized as', clf.predict(idata[i,0,:].reshape(1,4096))[0])
    if i == clf.predict(idata[i,0,:].reshape(1,4096))[0]:
        s += 1
print('Accuracy:', s/40)
print("computed in %0.3fs" % (time() - t0 + t_pca))



subject 0 recognized as 0
subject 1 recognized as 1
subject 2 recognized as 2
subject 3 recognized as 3
subject 4 recognized as 4
subject 5 recognized as 5
subject 6 recognized as 6
subject 7 recognized as 7
subject 8 recognized as 8
subject 9 recognized as 9
subject 10 recognized as 10
subject 11 recognized as 11
subject 12 recognized as 12
subject 13 recognized as 13
subject 14 recognized as 14
subject 15 recognized as 15
subject 16 recognized as 16
subject 17 recognized as 17
subject 18 recognized as 18
subject 19 recognized as 19
subject 20 recognized as 20
subject 21 recognized as 21
subject 22 recognized as 22
subject 23 recognized as 23
subject 24 recognized as 24
subject 25 recognized as 25
subject 26 recognized as 26
subject 27 recognized as 27
subject 28 recognized as 28
subject 29 recognized as 29
subject 30 recognized as 30
subject 31 recognized as 31
subject 32 recognized as 32
subject 33 recognized as 33
subject 34 recognized as 34
subject 35 recognized as 35
subject 36 r