In [11]:
from ml.ModelComponents import *
from utils.metrics import accuracy, cross_entropy, accuracy_for_classification_iris
from utils.loss import cross_entropy_loss

In [12]:
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import trange

In [13]:
np.random.seed(72)

In [14]:
def read_image(path="ORL3232", category=40):
    def one_hot(x):
        z = np.zeros(category)
        z[x - 1] = 1
        return z

    bmps = Path(path).rglob("*.bmp")
    data = {"labels": [], "ids": [], "images": []}

    for bmp in bmps:
        if not bmp.parent.stem.isdigit():
            continue
        data["labels"].append(int(bmp.parent.stem))
        data["ids"].append(int(bmp.stem))
        data["images"].append(cv2.imread(str(bmp))[:, :, 0].squeeze().flatten())

    dataframe = pd.DataFrame(data).sample(frac=1)
    dataframe['labels'] = dataframe['labels'].apply(lambda x: one_hot(x))

    train = dataframe[dataframe['ids'] % 2 == 1]
    test = dataframe[dataframe['ids'] % 2 == 0]

    images = np.stack(train['images'].tolist() + test['images'].tolist())
    labels = np.stack(train['labels'].tolist() + test['labels'].tolist())

    return images, labels

In [15]:
def pca(data, n_components):
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    data_normalized = (data - mean) / std
    cov_matrix = np.cov(data_normalized, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
    sorted_index = np.argsort(eigenvalues)[::-1]
    sorted_eigenvectors = eigenvectors[:, sorted_index]
    feature_vectors = sorted_eigenvectors[:, :n_components]
    pca_data = np.dot(data_normalized, feature_vectors)
    return pca_data

In [16]:
def normalize(data):
    data = data / 255
    return data

def min_max_normalize(data):
    data = (data - np.min(data)) / (np.max(data) - np.min(data))
    return data
    

In [17]:
EPOCH = 6000
LEARNING_RATE = 0.1
n_components = 64
softmax = Softmax()

In [18]:
images, labels = read_image()
images = normalize(images)
images = pca(images, n_components)
images = min_max_normalize(images)
train_images, test_images = images[:200, :], images[200:, :]
train_labels, test_labels = labels[:200, :], labels[200:, :]

In [19]:
model = Model([
    Linear(n_components, 32),
    Relu(),
    Linear(32, 40)
])

In [20]:
min_test_loss = 100
min_test_loss_cnt = 0
for epoch in range(EPOCH):
    train_output = softmax.forward(model.forward(train_images))
    gradian = cross_entropy_loss(train_labels, train_output)
    model.backward(gradian, LEARNING_RATE)
    train_loss = cross_entropy(train_labels, train_output)
    train_acc = accuracy_for_classification_iris(train_labels, train_output)

    test_output = softmax.forward(model.forward(test_images))
    test_loss = cross_entropy(test_labels, test_output)
    test_acc = accuracy_for_classification_iris(test_labels, test_output)
    
    print(
        f"Epoch: {epoch}, Train Loss: {train_loss: .02f}, Train Acc: {train_acc}, Test Loss: {test_loss: .02f}, Test Acc: {test_acc}")

Epoch: 0, Train Loss:  30.51, Train Acc: 0.025, Test Loss:  28.02, Test Acc: 0.025
Epoch: 1, Train Loss:  28.03, Train Acc: 0.025, Test Loss:  26.30, Test Acc: 0.025
Epoch: 2, Train Loss:  26.29, Train Acc: 0.025, Test Loss:  24.71, Test Acc: 0.025
Epoch: 3, Train Loss:  24.73, Train Acc: 0.025, Test Loss:  21.24, Test Acc: 0.025
Epoch: 4, Train Loss:  21.24, Train Acc: 0.025, Test Loss:  19.86, Test Acc: 0.025
Epoch: 5, Train Loss:  19.86, Train Acc: 0.025, Test Loss:  20.92, Test Acc: 0.045
Epoch: 6, Train Loss:  20.91, Train Acc: 0.05, Test Loss:  19.35, Test Acc: 0.025
Epoch: 7, Train Loss:  19.35, Train Acc: 0.025, Test Loss:  21.67, Test Acc: 0.025
Epoch: 8, Train Loss:  21.67, Train Acc: 0.03, Test Loss:  22.45, Test Acc: 0.025
Epoch: 9, Train Loss:  22.45, Train Acc: 0.025, Test Loss:  24.13, Test Acc: 0.005
Epoch: 10, Train Loss:  24.14, Train Acc: 0.005, Test Loss:  25.55, Test Acc: 0.045
Epoch: 11, Train Loss:  25.55, Train Acc: 0.04, Test Loss:  25.98, Test Acc: 0.035
Epoch