In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as img
from PIL import Image

In [None]:
def read_image(path):
    # Reads an image from the determined path
    # The image has to have a bit-depth of 8 (each pixel's value is in the 0-255 range)
    img_to_recognize = img.imread(path)
    img_to_recognize = Image.fromarray(np.uint8(img_to_recognize))
    img_to_recognize = img_to_recognize.resize((150, 150)) # Resizes the image to the appropriate size
    img_to_recognize = img_to_recognize.convert('L') # Converts it to grayscale
    img_to_recognize = np.array(img_to_recognize) # Converts it into an array
    return img_to_recognize.flatten()/255 # Flattens and normalizes the data

In [None]:
def display_image(array):
    # Displays the image represented by a flattened array
    resized = np.resize(array, (150, 150))
    fig = plt.imshow(resized, cmap="gray")
    fig.axes.get_xaxis().set_visible(False)
    fig.axes.get_yaxis().set_visible(False)
    fig


def display_image_ind(matrix, index):
    # Displays the image present in the row_{index} of the specified matrix
    resized = np.resize(matrix[index], (150, 150))
    fig = plt.imshow(resized, cmap="gray")
    fig.axes.get_xaxis().set_visible(False)
    fig.axes.get_yaxis().set_visible(False)
    fig


def plot_portraits(images, titles, h, w, n_row, n_col):
    plt.figure(figsize=(2.2 * n_col, 2.2 * n_row))
    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.20)
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)
        plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
        plt.title(titles[i])
        plt.xticks(())
        plt.yticks(())

In [None]:
images = [] # List that will house all individual arrays of images, it will become a 2D array later

for file in os.listdir('data'):
    images.append(read_image(os.path.join('data', file)))


In [None]:
image_matrix = np.row_stack(tuple(images))


In [None]:
def pca(X):
    # X is the data matrix
    mean = np.mean(X, axis=0)
    centered_data = X-mean
    U, S, Vh = np.linalg.svd(centered_data, full_matrices=False)
    
    return Vh, mean, centered_data, S**2


In [None]:
Vh, average_matrix, subtracted, eigenvalues = pca(image_matrix)
eigenfaces = Vh.reshape((434, 150, 150))

In [None]:
Vh[6]

In [None]:
percent_eigenvalues = [eigenvalue/np.sum(eigenvalues) for eigenvalue in eigenvalues]
count = 0
total_var = 0
var_values = []
for eigenvalue in percent_eigenvalues:
    total_var += eigenvalue
    count += 1
    var_values.append(total_var)
    # if total_var > 0.95:
    #     break

print("Count:", count, "\nTotal Variance:", total_var)

In [None]:
plt.plot(var_values)
plt.xticks(range(0,400,30))
plt.xlim(-1, 400)
# plt.savefig(os.path.join('images', 'accumulated variance.png'))
plt.show()

In [None]:
display_image(average_matrix)

# plt.savefig(os.path.join('images', 'average face.png'), bbox_inches='tight')

In [None]:
# eigenface_titles = [f"eigenface {434-i}" for i in range(eigenfaces.shape[0])]
# plot_portraits(eigenfaces[::-1], eigenface_titles, 150, 150, 2, 5) 
# plt.savefig(os.path.join('images', 'lowest eigenfaces.png'), bbox_inches='tight')

eigenface_titles = [f"eigenface {i+91}" for i in range(eigenfaces.shape[0])]
plot_portraits(eigenfaces[90:105,:], eigenface_titles, 150, 150, 2, 5) 
# plt.savefig(os.path.join('images', 'medium eigenfaces.png'), bbox_inches='tight')

In [None]:
display_image_ind(image_matrix, 142)
# plt.savefig(os.path.join('images', 'original face.png'), bbox_inches='tight')

In [None]:
def reconstruction(centered_data, eigenfaces, average, h, w, image_index):
    weights = np.dot(centered_data, eigenfaces.T) # Gets the weight significance of each eigenface
    weighted_vectors = np.dot(weights[image_index, :], eigenfaces) # Multiplies each eigenface by its weight
    recovered_image = (average + weighted_vectors).reshape(h, w) # Adds each weighted eigenface to the average face
    return recovered_image

display_image(reconstruction(subtracted, Vh[:300,:], average_matrix, 150, 150, 142))
# plt.savefig(os.path.join('images', 'reconstructed full face.png'), bbox_inches='tight')

In [None]:
def reconstruction_outsider(eigenfaces, average, h, w, path):
    outsider = read_image(path) # Reads the image
    outsider = outsider - average # Centralizes the data
    weights = np.dot(outsider, eigenfaces.T) # Gets the weight significance of each eigenface
    weighted_vectors = np.dot(weights, eigenfaces) # Multiplies each eigenface by its weight
    recovered_image = (average + weighted_vectors).reshape(h, w) # Adds each weighted eigenface to the average face
    return recovered_image

In [None]:
def recognize(path, eig_num, face_limit, person_limit):
    # Path is the file path, eig_num is the amount of wanted eigenfaces on the facespace
    img_to_recognize = read_image(path)
    subtracted_matrix_rec = img_to_recognize - average_matrix # Centralizes the inputted image
    subtracted_matrix_rec = subtracted_matrix_rec.flatten()

    eigenfaces_matrix = Vh[:eig_num,:] # Gets the requested amount of eigenfaces

    weight = subtracted_matrix_rec @ eigenfaces_matrix.T # Gets the eigenface weights

    projection = eigenfaces_matrix.T @ weight # Gets the projection of the image on the facespace

    proj_error = np.linalg.norm(subtracted_matrix_rec - projection)*255 # Gets the projection error
    

    original_faces_weights = eigenfaces_matrix @ subtracted.T # Gets the eigenface weights of each original face


    dist_in_space = []
    # Checks the distance between the weights 
    # of the unknown face and every original face
    for i in range(len(subtracted[:,0])):
        dist = np.linalg.norm(original_faces_weights[:,i] - weight)
        dist_in_space.append(dist)
    
    dist_in_space = np.array(dist_in_space)

    face_error = dist_in_space.min() # Gets the lowest distance

    guess_index = np.argmin(dist_in_space) # Gets the image tied to that lowest distance
    
    if proj_error > face_limit: # Checks if the error is higher than the set limit
        guess = "Not a face"
    elif face_error > person_limit:
        guess = "Unknown face"
    else:
        celebrity_photos = os.listdir('data')
        celebrity_names = [name[:name.find('0')-1].replace("_", " ") for name in celebrity_photos] # Gets the names of all images
        guess = celebrity_names[guess_index] # Gets the predicted name

    display_image(image_matrix[guess_index]) # Displays the closest image
    return proj_error, face_error, guess

In [None]:
name = 'carol7'
path_extra = os.path.join('extra_testing', name + '.jpg')
display_image(read_image(path_extra))
# plt.savefig(os.path.join('images', name + '.png'), bbox_inches='tight')

In [None]:
print(recognize(path_extra, 106, 5000, 100))
# plt.savefig(os.path.join('images', name + '_famoso.png'), bbox_inches='tight')

In [None]:
display_image(reconstruction_outsider(Vh, average_matrix, 150, 150, path_extra))

In [None]:
def test_images(path, eig_num, face_error, person_error):
    success = 0
    failure = 0
    not_a_face = 0
    unknown_face = 0
    for file in os.listdir(path):
        path_testing = os.path.join(path, file)
        results = recognize(path_testing, eig_num, face_error, person_error)
        if results[2] == "Not a face":
            not_a_face += 1
        elif results[2] == "Unknown face":
            unknown_face += 1
        elif results[2] == file[:file.find('0')-1].replace("_", " "):
            success += 1
        else:
            failure += 1

    success = success*100/len(os.listdir(path))
    failure = failure*100/len(os.listdir(path))
    not_a_face = not_a_face*100/len(os.listdir(path))
    unknown_face = unknown_face*100/len(os.listdir(path))
    return success, failure, not_a_face, unknown_face

In [None]:
success_rate, failure_rate, not_face_rate, unknown_rate = test_images('data', 106, 3500, 22.2)
success_rate, failure_rate, not_face_rate, unknown_rate