In [400]:
from os import listdir
import os, random, copy
from PIL import Image
import numpy as np
from collections import defaultdict
import dataloader

In [401]:
# Function to convert the Python dictionary loaded in by the dataloader.py load_data function 
# into a NumPy matrix for downstream manipulation
# Params:
# image_dict - Python dictionary with facial expressions as keys and a list NumPy arrays representing
#              image as values
# Return: A NumPy matrix of dimensions dxM denoting M images of d dimensions
def get_matrix(image_dict):
    image_matrix = []
    for emotion in image_dict:
        for image in image_dict[emotion]:
            image_matrix.append(np.concatenate(image))
    return np.matrix(image_matrix).T

In [402]:
# Function to apply PCA to a dxM image matrix, with M images each of dimension d
# Params:
# image_matrix - a dxM matrix representing M images each of dimension d (output of get_matrix)
# Return: the eigenvalues and principal components of the covariance matrix of the input
# eig_vals - the M eigenvalues of the MxM covariance matrix
# pcs - the M dx1 eigenvectors of the covariance matrix in the form of a dxM matrix
# Return: the M eigenvalues and M pcs from the covariance matrix calculated via PCA
def principal_components(image_matrix):
    mean_face = np.matrix.mean(image_matrix, axis = 1)
    centered_matrix = image_matrix - mean_face
    covariance_matrix = np.dot(centered_matrix.T, centered_matrix)/centered_matrix.shape[1]
    eig_vals, eig_vecs = np.linalg.eig(covariance_matrix)
    order = np.argsort(eig_vals)[::-1]
    eig_vals = eig_vals[order]
    eig_vecs = eig_vecs[:,order]
    pcs = centered_matrix*eig_vecs
    return(eig_vals, pcs)

In [507]:
# Function to project a dxM image matrix onto pcs calculate by PCA
# Params:
# mean_face - the average face calculate from averaging pixel values of training set
# image_matrix - the images to project in the from of a dxM matrix
# eig_vals - the eigenvalues calculated from PCA on the training set
# pcs - the pcs calculated from PCA on the training set
# k - the number of dimensions to keep
# Return: the dxM image matrix projected down to the kxM space
def project_k_dimensions(mean_face, image_matrix, eig_vals, pcs, k):
    centered_matrix = image_matrix - mean_face
    projections = centered_matrix.T*pcs[:, 0:k]
    projections = projections/(np.linalg.norm(pcs[:, 0:k], axis = 0))/np.sqrt(eig_vals[0:k])
    return projections.T#[:,0:k].T

In [532]:
# Function to inverse a projection back to an image
# Params:
def projection_to_image(projected_data, mean_face, k, pcs, eig_values):
    print((np.linalg.norm(pcs, axis = 0)[0:k]).shape)
    print(projected_data.shape)
    inverse_projections = np.multiply(projected_data.T, np.linalg.norm(pcs, axis = 0)[0:k]) 
    inverse_projections = np.multiply(inverse_projections, np.sqrt(eig_values[0:k]))
    inverse_images = pcs[:, 0:k] * inverse_projections.T
    print(inverse_projections[:,0])
projection_to_image(projected_data, mean_face, 40, pcs, eig_vals)
print(pcs[:, 0:40].shape)

(40,)
(40, 309)
[[ 1.07483252e+09]
 [-8.91351522e+08]
 [ 1.66939720e+09]
 [ 9.70739890e+08]
 [-8.68271384e+08]
 [-8.40548013e+07]
 [-5.46337184e+08]
 [ 6.02670890e+08]
 [ 1.99143573e+08]
 [-1.68195306e+08]
 [ 9.59681180e+08]
 [ 6.81466420e+08]
 [ 1.17995862e+09]
 [-3.85321582e+08]
 [-1.37032219e+08]
 [-1.26276970e+09]
 [-6.92772156e+08]
 [ 1.11075427e+09]
 [ 7.75013254e+08]
 [ 1.27531094e+09]
 [ 9.88166327e+08]
 [-4.36895950e+08]
 [-8.48216733e+08]
 [-1.92913626e+08]
 [-9.03739090e+08]
 [-1.05684031e+09]
 [-5.91579248e+08]
 [-1.15731993e+09]
 [-8.06866122e+08]
 [ 1.36929466e+09]
 [ 1.19878009e+09]
 [-4.68012400e+08]
 [-3.75433875e+08]
 [-1.12080392e+09]
 [ 8.26130710e+08]
 [ 8.38005667e+07]
 [-7.92596672e+08]
 [-2.16727824e+08]
 [-1.09455412e+09]
 [-6.18361252e+08]
 [-4.52378023e+08]
 [-1.42259563e+08]
 [ 2.59377473e+08]
 [ 2.48107497e+09]
 [ 1.22022517e+09]
 [ 1.62325332e+09]
 [ 3.47442260e+08]
 [ 5.21067113e+08]
 [-7.44599745e+08]
 [ 8.41128540e+08]
 [-2.33185223e+08]
 [-4.42051908e+

In [534]:
# Essentially a main function to load in whataver data will be applied to PCA
data, cnt = dataloader.load_data('./aligned/')  # Will depend on what data we are loading and training on and holding out
data = dataloader.balanced_sampler(data, cnt, ['anger', 'happiness'])

Emotions: ['fear', 'surprise', 'sadness', 'happiness', 'anger', 'disgust'] 

fear: 25 # of images
surprise: 83 # of images
sadness: 28 # of images
happiness: 69 # of images
anger: 45 # of images
disgust: 59 # of images

Balanced Set:
anger: 45 # of images
happiness: 45 # of images


In [535]:
# Display an example image
dataloader.display_face(data['anger'][0])
n1, n2 = data['anger'][0].shape

Converting from array to PIL Image


In [536]:
# Convert to a dxM matrix and calculate eigenvalues and principal components
image_m = get_matrix(data)
print(image_m.shape)
eig_vals, pcs = principal_components(image_m)
print(pcs.shape)

(43008, 90)
(43008, 90)


In [537]:
# Calculate and display the mean face among the data
mean_face = np.matrix.mean(image_m, axis = 1)
dataloader.display_face(np.array(np.reshape(mean_face, (n1,n2))))
centered_matrix = image_m - mean_face

Converting from array to PIL Image


In [538]:
# First sanity check
first_projection = centered_matrix.T*pcs[:,0]/np.linalg.norm(pcs[:,0])
print(np.mean(first_projection))
print(np.std(first_projection))
print(np.sqrt(eig_vals[0]))

# Second sanity check
norm_first_projection = first_projection/np.sqrt(eig_vals[0])
print(np.mean(norm_first_projection))
print(np.std(norm_first_projection))
print(norm_first_projection[:,0])

-2.021099337273174e-14
7045.691937087485
7045.6919370874875
-4.934324553889585e-18
0.9999999999999994
[[-1.4618196 ]
 [-1.07633228]
 [ 0.37994631]
 [ 0.9586107 ]
 [ 0.57179275]
 [-1.46657047]
 [-1.03642918]
 [ 0.75186488]
 [ 0.44096947]
 [-1.2767446 ]
 [-0.26795138]
 [ 1.29419798]
 [ 1.85555041]
 [ 1.66561024]
 [-1.9502304 ]
 [ 0.28740058]
 [-1.94875683]
 [-0.95967342]
 [ 0.52970415]
 [-2.62069937]
 [-0.53725405]
 [ 0.09235244]
 [-0.57764122]
 [-1.41309171]
 [-1.05243787]
 [ 0.77065647]
 [ 0.5869963 ]
 [-1.18082862]
 [-0.08131453]
 [-0.53393178]
 [ 0.86469077]
 [ 1.19379113]
 [ 0.45386585]
 [-0.55148367]
 [-1.62416512]
 [-0.70395587]
 [ 1.73487674]
 [-1.31555419]
 [ 0.72528285]
 [ 0.32268165]
 [-0.75922821]
 [-1.6910278 ]
 [-1.23656547]
 [-0.4239066 ]
 [-1.2317332 ]
 [-0.01475007]
 [ 0.39683682]
 [ 0.50796727]
 [ 0.49170541]
 [ 1.99533145]
 [ 1.70464916]
 [-0.37927506]
 [-0.02798521]
 [ 0.20828578]
 [-0.11349527]
 [ 0.60189729]
 [ 0.94333187]
 [-0.40122679]
 [-0.31779793]
 [-0.55030331

In [539]:
projected_data = project_k_dimensions(mean_face, image_m, eig_vals, pcs, 40)
print(projected_data[0:,])

[[-1.4618196  -1.07633228  0.37994631 ...  0.96693832 -0.57349506
  -0.60724054]
 [-1.32904598  0.35517542 -1.20799171 ...  1.2077038   1.13593771
   1.00942214]
 [ 0.41268012 -1.42144158  0.77805716 ...  0.59726759 -0.25460851
   0.64030177]
 ...
 [ 0.05268772 -0.88868778  0.91752027 ...  2.31971497 -0.25582675
  -0.06789648]
 [ 0.0596225  -0.33239289  0.87711803 ... -1.86850634 -0.81189023
  -0.26953585]
 [ 0.48687082 -0.99527469 -0.27555411 ...  0.54899087 -0.25351816
  -1.23911957]]


In [541]:
# Display first 4 principle components of PCA
for i in range(0,6):
    dataloader.display_face(np.array(np.reshape(pcs[:,i], (n1, n2))))

Converting from array to PIL Image
Converting from array to PIL Image
Converting from array to PIL Image
Converting from array to PIL Image
Converting from array to PIL Image
Converting from array to PIL Image
