In [38]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from PIL import Image
from sklearn.decomposition import PCA

In [39]:
image_folder = "img_align_celeba/img_align_celeba"
img_matrix = []

count = 0
#max_count = 202599
for image in os.listdir(image_folder):
    if image != "me.jpg" and count < 1000:
        image_path = os.path.join(image_folder, image)
        greyScale_img = Image.open(image_path).convert("L")
        reSized_img = greyScale_img.resize((64,64))
        img_vector = np.array(reSized_img).flatten()
        img_matrix.append(img_vector)
        count+=1

img_matrix = np.array(img_matrix)

In [40]:
n_comp = [10, 50, 100, 250, 500]
pca_matrix = []
inverse_pca_matrix = []

centered_matrix = img_matrix - np.mean(img_matrix, axis = 0)
cov_matrix = np.cov(centered_matrix.T)

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=len(n_comp)+1, figsize=(16,16))
rand_ind = random.choice(range(len(img_matrix)))

for ind, n in enumerate(n_comp):
    pca = PCA(n_components=n)
    pca_data = pca.fit_transform(centered_matrix)
    pca_matrix.append(pca_data)
    inverse_pca_data = pca.inverse_transform(pca_data)
    inverse_pca_matrix.append(inverse_pca_data)
    
    image = inverse_pca_data[rand_ind].reshape(64, 64)
    axes[ind].imshow(image, cmap="gray")
    axes[ind].set_title(f'n_comp={n}')

original_img = img_matrix[rand_ind].reshape(64, 64)
axes[len(n_comp)].imshow(original_img, cmap="gray")
axes[len(n_comp)].set_title("original image")

plt.tight_layout()
plt.show()

In [None]:
pca = PCA(n_components=500)
pca.fit(centered_matrix)
eigenfaces = pca.components_
mean_face = pca.mean_

weights = []
my_face_img = Image.open("me.jpg").convert("L").resize((64,64))
my_face = np.array(my_face_img).flatten()

for i in range(img_matrix.shape[0]):
    weight = []
    for j in range(max(n_comp)):
        w = eigenfaces[j] @ (img_matrix[i] - pca.mean_)
        weight.append(w)
    weights.append(weight)

my_face.reshape(1,-1)
face_weight = eigenfaces @ (my_face - pca.mean_).T
euclidean_distance = np.linalg.norm(weights - face_weight, axis=0)
closest_face = np.argmin(euclidean_distance)

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8,8))
axes[0].imshow(my_face_img, cmap="gray")
axes[0].set_title("Original Image")
axes[1].imshow(img_matrix[closest_face].reshape(64,64), cmap="gray")
axes[1].set_title("Closest Person's Image")

plt.tight_layout()
plt.show()

In [None]:
newface_matrix = []
weights_std = np.array(weights).std()
fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(64,64))
for i in range(5):
    random_weights = np.random.randn(500) * weights_std
    newface = random_weights @ eigenfaces + pca.mean_
    axes[i].imshow(newface.reshape(64,64), cmap="gray")
    
plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error

mse_values = []
for n, inverse_pca_data in zip(n_comp, inverse_pca_matrix):
    mse = mean_squared_error(img_matrix, inverse_pca_data)
    mse_values.append(mse)
    print(f"MSE for n_comp={n}: {mse}")