In [22]:
import math
import random
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt 
import os 

# import subset of images from the training dataset
path = './Dataset/Training/'
expressions = ['Angry', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']
samp_per_exp = 1000 # number of samples per expression - increase later 
i = 0

raw_train = []

# open the first samp_size images in path directory
for exp in expressions:
    for file in os.listdir(path + exp + '/')[:samp_per_exp]:
        if file.endswith('.jpg'):
            img = plt.imread(path + exp + '/' + file)
            # convert to numpy vector
            img = img.flatten()
            raw_train.append(img)
            # print(img.shape)


In [23]:
# vectorize training images
train = np.array(raw_train)
print(train)
print(train.shape)

# determine mean intensity of each pixel
mean = np.mean(train, axis=0)
print(mean)

# mean center pixel intensities
train = train - mean
print(train)
print(train.shape)



[[152 149 147 ... 193 181 183]
 [117 116 114 ...  45  47  50]
 [159 159 145 ... 140 148 126]
 ...
 [ 46  67 101 ... 254 255 255]
 [225 218 237 ...  92  95  95]
 [240 254 255 ...  35  32  30]]
(6000, 2304)
[122.52583333 119.3205     116.23983333 ... 113.06666667 113.60816667
 114.25466667]
[[ 29.47416667  29.6795      30.76016667 ...  79.93333333  67.39183333
   68.74533333]
 [ -5.52583333  -3.3205      -2.23983333 ... -68.06666667 -66.60816667
  -64.25466667]
 [ 36.47416667  39.6795      28.76016667 ...  26.93333333  34.39183333
   11.74533333]
 ...
 [-76.52583333 -52.3205     -15.23983333 ... 140.93333333 141.39183333
  140.74533333]
 [102.47416667  98.6795     120.76016667 ... -21.06666667 -18.60816667
  -19.25466667]
 [117.47416667 134.6795     138.76016667 ... -78.06666667 -81.60816667
  -84.25466667]]
(6000, 2304)


In [24]:
# compute covariance matrix
cov = np.cov(train, rowvar=False)
print(cov)
print(cov.shape)

[[6887.17352823 6447.02514127 5803.37361657 ... 1376.53203312
  1457.21190768 1514.24946269]
 [6447.02514127 6608.60721429 6175.97446249 ... 1333.63007168
  1421.75037431 1464.05205468]
 [5803.37361657 6175.97446249 6432.69076177 ... 1322.84781908
  1407.54039809 1433.6555317 ]
 ...
 [1376.53203312 1333.63007168 1322.84781908 ... 5972.2752681
  5716.27416792 5360.02652664]
 [1457.21190768 1421.75037431 1407.54039809 ... 5716.27416792
  6075.19583261 5852.64822826]
 [1514.24946269 1464.05205468 1433.6555317  ... 5360.02652664
  5852.64822826 6181.23335045]]
(2304, 2304)


In [29]:
# eigenvalue decomposition to get eigenvectors and eigenvalues

e_vals, e_vecs = np.linalg.eig(cov)

e_val_vec = []
for i in range(len(e_vals)):
    e_val_vec.append([e_vals[i].real, e_vecs[i]])

# sort eigenvalues by magnitude

e_val_vec.sort(key=lambda x: abs(x[0]), reverse=True)

print(len(e_val_vec))

print([val[0] for val in e_val_vec[:30]])

# greatest_imag_val = 0
# greatest_imag_vec = 0
# greatest_real_val = 0
# greatest_real_vec = 0

# for i in range(len(e_vals)):
#     if abs(e_vals[i].imag) > greatest_imag_val:
#         greatest_imag_val = abs(e_vals[i].imag)
    
#     for x in e_vecs[i].flatten():
#         if abs(x.imag) > greatest_imag_vec:
#             greatest_imag_vec = abs(x.imag)

#     if abs(e_vals[i].real) > greatest_real_val:
#         greatest_real_val = abs(e_vals[i].real)

#     for x in e_vecs[i].flatten():
#         if abs(x.real) > greatest_real_vec:
#             greatest_real_vec = abs(x.real)

# print(greatest_imag_val)
# print(greatest_imag_vec)
# print(greatest_real_val)
# print(greatest_real_vec)




# for i in range(len(eig_vals)):
#     print('Eigenvalue: ', eig_vals[i], ' Eigenvector: ', eig_vecs[i])


2304
[2700151.8435294144, 901118.8454053543, 868397.4833179364, 510518.73389615846, 277292.8682957908, 236422.9073442376, 197138.78609777289, 183357.1480804222, 168494.7503912688, 140579.82945811408, 110114.67411841018, 101029.20485406219, 97675.71511810125, 93343.08327751681, 88917.09953422827, 76169.0271506033, 74304.2126820279, 60762.87188765108, 51421.136187642056, 49813.93457035172, 48494.33039837433, 46952.774035385824, 45654.53470429665, 43542.477716161775, 41633.511055159724, 39037.565568478494, 38023.919766735235, 35200.79419393189, 33262.79783292695, 32583.2421736768]


In [1]:
# use the n eigenvectors with greatest eigenvalues to form new basis
n = 200
sig_e_vecs = np.array([x[1] for x in e_val_vec[:n]])



# project training images onto new basis
proj_train = np.matmul(train, sig_e_vecs.T)

