In [2]:
import random
import numpy as np
import matplotlib.pyplot as plt
from os import listdir, path, walk
from PIL import Image
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

In [3]:
DATASET_DIR = "/content/drive/My Drive/dataset"

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
def read_and_split_dataset(split_const):
    train_set, test_set = list(), list()
    for _ , dirnames , _ in walk(DATASET_DIR):
        for sub in dirnames:
            dir_path = f"{DATASET_DIR}/{sub}"
            sub_set = list()

            for blob_name in listdir(dir_path):
                img = Image.open(path.join(dir_path , blob_name)).convert('L') 
                face = np.array(img).ravel()
                label = f"{sub}"
                sub_set.append([face, label])

            random.shuffle(sub_set)
            train_set.extend(sub_set[:split_const])
            test_set.extend(sub_set[split_const:])
    return train_set, test_set

In [6]:
def read_and_prepare_dataset(split_const):
    train_set, test_set = read_and_split_dataset(split_const)
    train_set, test_set = np.array(train_set), np.array(test_set)
    train_data, train_labels = np.hsplit(train_set, 2)
    test_data, test_labels = np.hsplit(test_set, 2)
    train_data = np.array([x[0] for x in train_data])
    test_data = np.array([x[0] for x in test_data])
    train_labels = np.array([x[0] for x in train_labels])
    test_labels = np.array([x[0] for x in test_labels])
    return train_data, train_labels, test_data, test_labels

In [7]:
def choose_dimensionality(eigen_values, threshold):
    total_variance = np.sum(eigen_values)
    variance_fraction, num_dim, eigen_values_sum = 0, 1, 0
    while(variance_fraction < threshold):
        eigen_values_sum += eigen_values[num_dim-1]
        variance_fraction = eigen_values_sum / total_variance
        num_dim += 1
    return num_dim

In [8]:
def generate_center_matrix(train_data):
    train_mean = train_data.mean(axis=0)
    return train_data - train_mean

In [9]:
def generate_eigen_data(center_matrix):
    covariance_matrix = np.cov(center_matrix, rowvar=False, bias=True)
    eigen_values, eigen_vectors = np.linalg.eigh(covariance_matrix)
    eigen_values = np.flip(eigen_values, axis=0)
    eigen_vectors = np.flip(eigen_vectors, axis=1)
    return eigen_values, eigen_vectors

In [10]:
def generate_projection_matrix(eigen_values, eigen_vectors, alpha):
    dim = choose_dimensionality(eigen_values, alpha)
    return eigen_vectors[:,0:int(dim)]

In [11]:
def reduce_data_using_pca(projection_matrix, center_matrix):
    return projection_matrix.T @ center_matrix.T

In [12]:
def knn(train_data, train_labels, test_data, test_labels, k):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(train_data, train_labels)
    prediction = knn.predict(test_data)
    reutrn metrics.accuracy_score(test_labels, prediction)

In [13]:
train_data, train_labels, test_data, test_labels = read_and_prepare_dataset(5)

In [14]:
center_matrix = generate_center_matrix(train_data)

In [15]:
eigen_values, eigen_vectors = generate_eigen_data(center_matrix)

In [16]:
alphas = [0.8, 0.85, 0.9, 0.95]
projection_matrices = [generate_projection_matrix(eigen_values, eigen_vectors, alpha) for alpha in alphas]

In [17]:
reduced_train_data = [reduce_data_using_pca(projection_matrix, center_matrix) for projection_matrix in projection_matrices]

In [18]:
test_center_matrix = generate_center_matrix(test_data)

In [19]:
reduced_test_data = [reduce_data_using_pca(projection_matrix, test_center_matrix) for projection_matrix in projection_matrices]

In [23]:
k_values = [1, 3, 5, 7]
print([[knn(reduced_train.T, train_labels, reduced_test.T, test_labels, k) for reduced_train, reduced_test in zip(reduced_train_data, reduced_test_data)] for k in k_values])

[[0.94, 0.95, 0.94, 0.93], [0.87, 0.87, 0.885, 0.88], [0.755, 0.735, 0.735, 0.74], [0.715, 0.715, 0.71, 0.71]]
