In [10]:
from ml.ModelComponents import *
from utils.metrics import accuracy, cross_entropy
from utils.loss import cross_entropy_loss

In [11]:
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import trange

In [12]:
np.random.seed(72)

In [13]:
def read_image(path="ORL3232", category=40):
    def one_hot(x):
        z = np.zeros(category)
        z[x - 1] = 1
        return z

    bmps = Path(path).rglob("*.bmp")
    data = {"labels": [], "ids": [], "images": []}

    for bmp in bmps:
        if not bmp.parent.stem.isdigit():
            continue
        data["labels"].append(int(bmp.parent.stem))
        data["ids"].append(int(bmp.stem))
        data["images"].append(cv2.imread(str(bmp))[:, :, 0].squeeze().flatten())

    dataframe = pd.DataFrame(data).sample(frac=1)
    dataframe['labels_onehot'] = dataframe['labels'].apply(lambda x: one_hot(x))

    train = dataframe[dataframe['ids'] % 2 == 1]
    test = dataframe[dataframe['ids'] % 2 == 0]

    images = np.stack(train['images'].tolist() + test['images'].tolist())
    labels = np.stack(train['labels'].tolist() + test['labels'].tolist())
    labels_onehot = np.stack(train['labels_onehot'].tolist() + test['labels_onehot'].tolist())

    return images, labels, labels_onehot

In [14]:
def pca(data, n_components):
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    data_normalized = (data - mean) / std
    cov_matrix = np.cov(data_normalized, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
    sorted_index = np.argsort(eigenvalues)[::-1]
    sorted_eigenvectors = eigenvectors[:, sorted_index]
    feature_vectors = sorted_eigenvectors[:, :n_components]
    pca_data = np.dot(data_normalized, feature_vectors)
    return pca_data


def lda(X, y, num_components=2):
    class_labels = np.unique(y)
    mean_overall = np.mean(X, axis=0)
    S_W = np.zeros((X.shape[1], X.shape[1]))
    S_B = np.zeros((X.shape[1], X.shape[1]))
    for c in class_labels:
        X_c = X[y == c]
        mean_c = np.mean(X_c, axis=0)
        S_W += np.dot((X_c - mean_c).T, (X_c - mean_c))
        n_c = X_c.shape[0]
        mean_diff = (mean_c - mean_overall).reshape(-1, 1)
        S_B += n_c * np.dot(mean_diff, mean_diff.T)
    A = np.dot(np.linalg.inv(S_W), S_B)
    eigenvalues, eigenvectors = np.linalg.eig(A)
    eigenvectors = eigenvectors[:, np.argsort(eigenvalues)[::-1]]
    W = eigenvectors[:, :num_components]
    return np.real(W)

def euclidean_similarity(A, B):
    A = A[:, np.newaxis, :]
    B = B[np.newaxis, :, :]
    distances = np.sqrt(np.sum((A - B) ** 2, axis=2))
    similarity = 1 / (1 + distances)
    return similarity

In [15]:
def normalize(data):
    data = data / 255
    return data


def min_max_normalize(data):
    data = (data - np.min(data)) / (np.max(data) - np.min(data))
    return data

In [16]:
EPOCH = 6000
LEARNING_RATE = 0.1
n_components = 64
softmax = Softmax()

In [17]:
images, labels, labels_onehot = read_image()
images = normalize(images)
images = pca(images, n_components)
images = min_max_normalize(images)
convert_matrix = lda(images, labels, 14)
images = np.dot(images, convert_matrix)
train_images, test_images = images[:200, :], images[200:, :]
train_labels_onehot, test_labels_onehot = labels_onehot[:200, :], labels_onehot[200:, :]
train_labels, test_labels = labels[:200], labels[200:]

In [18]:
np.average(train_labels[euclidean_similarity(test_images, train_images).argmax(axis=1)] == test_labels)

0.99