In [1]:
import scipy.io
import numpy as np
from numpy import linalg as LA
import matplotlib
import time
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
from sklearn import preprocessing
import sympy

In [2]:
def show_img(img):
    temp = img.copy()
    temp.resize((46,56))
    im = Image.fromarray(temp.T)
    im.show()

In [3]:
def get_err(x, y):
    # they must be the same dimension
    err = x - y
    # show_img(err)
    err = np.square(err)
    return np.sqrt(np.sum(err)/len(x))

In [4]:
# load data
mat = scipy.io.loadmat('face.mat')
raw_data = mat['X']

D,N = raw_data.shape

raw_data = np.transpose(raw_data)

training_data = np.empty([int(520*0.8), 2576])
testing_data = np.empty([int(520*0.2), 2576])

# create training and test data
for x in range(52):
        # 8/2 ratio for training and testing datasets
	training_data[x*8:(x+1)*8] = raw_data[x*10:x*10+8]
	testing_data[x*2:(x+1)*2] = raw_data[x*10+8:(x+1)*10]

raw_data = np.transpose(raw_data)
training_data = np.transpose(training_data)
testing_data = np.transpose(testing_data)



In [5]:
# get mean image matrix
mean_face = training_data.mean(axis=1).reshape(-1,1)

show_img(mean_face)

In [25]:
def lowdim_pca(train, mean): 
    A = train - mean_face
    S = (1/N)*np.dot(np.transpose(A),A)
    print("rank of lowdim cov: ", LA.matrix_rank(S))
    # S = np.cov(np.transpose(A))
    w, v = LA.eig(S)
    v /= LA.norm(v,ord=2,axis=0)
    # u = principal components
    u = np.dot(A,v)
    u /= LA.norm(u,ord=2,axis=0)

    id = np.argsort(np.abs(w))[::-1]
    w = w[id]
    u = u[:,id]
    
    # return eigen vectors sorted from largest to smallest
    return u

In [31]:
def normal_pca(train, mean):
    A = train - mean_face
    S = (1/N)*np.dot(A,A.T)
    print("rank of S: ", LA.matrix_rank(S))
    print("S is symmetric: ", S == S.T)
    print()
    print("S is real: ", S.imag == 0)
    w,v = LA.eig(S)
    v /= LA.norm(v,ord=2,axis=0)
    # nz_u = principal components with non-zero eigenvals
    print("number of zero eigen vals: ", np.sum(w != 0))
    nz_u = v[w != 0]
    nz_u /= LA.norm(nz_u, ord=2, axis=0)
    nz_w = w[w != 0]
    print("eigenvalues: ", nz_w)
    print("complex eigen vals are: ", nz_w[nz_w.imag != 0])
    id = np.argsort(np.abs(nz_w))[::-1]
    nz_w = nz_w[id]
    nz_u = nz_u[:,id]
    
    # return non-zero eigen vectors sorted from largest to smallest
    return nz_u    

rank of each matrix is the same : 415
Most of eigen values in normal pca should be 0. But instead are complex due to numeric error. need a way to discard small eigen vals


In [32]:
# generate principal components
pca_eigvec = normal_pca(training_data, mean_face)
lowdim_eigvec = lowdim_pca(training_data, mean_face)

u_norm = lowdim_eigvec

rank of S:  415
S is symmetric:  [[ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]]
S is real:  [[ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]]
number of zero eigen vals:  2576
eigenvalues:  [ 7.49110900e+05+0.j  4.05774608e+05+0.j  3.82692319e+05+0.j ...
 -1.38623395e-13+0.j -3.08804252e-13+0.j -5.11278692e-14+0.j]
complex eigen vals are:  [-2.59523913e-11+5.21886224e-13j -2.59523913e-11-5.21886224e-13j
  1.93050103e-11+2.56784215e-13j ... -1.74089720e-13-2.17368917e-13j
 -9.89640642e-14+4.19988220e-14j -9.89640642e-14-4.19988220e-14j]
rank of lowdim cov:  415


In [28]:
print(pca_eigvec[pca_eigvec.imag != 0])

[-0.00379327-2.22918912e-05j -0.00379327+2.22918912e-05j
  0.00384693+3.44841304e-04j ...  0.02021531-2.12041180e-02j
  0.01856723-9.01670400e-04j  0.01856723+9.01670400e-04j]


In [7]:
# for i in range(10):
#     asdf = u_norm[:,i].reshape(46,56)
#     im = Image.fromarray(asdf)
#     im.show()

In [8]:
input = training_data
delta = input - mean_face

Compute weights for testing images

In [9]:
n_face = 416
eigface = u_norm[:,:n_face]
weights = np.matmul(delta.T, eigface)
print(eigface.shape)
print(delta.shape)
print(weights.shape)

(2576, 416)
(2576, 416)
(416, 416)


In [10]:
reconstructed_faces = np.matmul(weights, eigface.T)
print(reconstructed_faces[0])
print(reconstructed_faces.shape)

face = []
for i in range(len(reconstructed_faces)):
    face.append(reconstructed_faces[i][:] + mean_face.T)


for x in face[:20]:
    show_img(x)

[-15.81900395 -19.99002851 -17.67563775 ... -46.65471535 -47.0460667
 -42.1257451 ]
(416, 2576)


In [11]:
print("mean pixel error: ", get_err(training_data[:,0], face[0]))

mean pixel error:  2.058567171627294
