In [None]:
import torch
import h5py
from pca import pca
from lda import lda
from load import load_and_split_data
from plot import plot_eigenfaces, plot_2d_data

reduced_dims = range(1, 9)

import torch.multiprocessing as mp
mp.set_start_method('spawn', force=True)

# 加载数据
train_data, test_data, train_label, test_label = load_and_split_data()

if torch.cuda.is_available():
  train_data = train_data.cuda()
  test_data = test_data.cuda()

In [None]:
# 学习PCA投影矩阵
pca_components, pca_ratio = pca(train_data, n_components=8)

# 显示PCA特征脸
plot_eigenfaces(pca_components, "PCA Eigenfaces (First 8 Components)")

In [None]:
# 学习LDA投影矩阵
lda_components, lda_ratio = lda(train_data, train_label, n_components=8)

# 显示LDA Fisher脸
plot_eigenfaces(lda_components, "LDA Fisherfaces (First 8 Components)")

In [None]:
# PCA降维
pca_components_2d, _ = pca(train_data, n_components=2)

train_pca_2d = (train_data - torch.mean(train_data, axis=0)) @ pca_components_2d

plot_2d_data(train_pca_2d, train_label, "PCA Projection (Training Data, 2D)")

test_pca_2d = (test_data - torch.mean(train_data, axis=0)) @ pca_components_2d

plot_2d_data(test_pca_2d, test_label, "PCA Projection (Test Data, 2D)")

In [None]:
# LDA降维
lda_components_2d, _ = lda(train_data, train_label, n_components=2)

train_lda_2d = train_data @ lda_components_2d

plot_2d_data(train_lda_2d, train_label, "LDA Projection (Training Data, 2D)")

test_lda_2d = test_data @ lda_components_2d

plot_2d_data(test_lda_2d, test_label, "LDA Projection (Test Data, 2D)")

In [None]:
with h5py.File("reduced_data/pca.h5", "w") as f:
  for dim in reduced_dims:
    pca_components, _ = pca(train_data, n_components=dim)
    train_pca = (train_data - torch.mean(train_data, axis=0)) @ pca_components
    test_pca = (test_data - torch.mean(train_data, axis=0)) @ pca_components
    grp = f.create_group(f"dim_{dim}")
    grp.create_dataset("train", data=train_pca.cpu())
    grp.create_dataset("test", data=test_pca.cpu())

print("PCA reduced data saved to reduced_data/pca.h5")

with h5py.File("reduced_data/lda.h5", "w") as f:
  for dim in reduced_dims:
    lda_components, _ = lda(train_data, train_label, n_components=dim)
    train_pca = train_data @ lda_components
    test_pca = test_data @ lda_components
    grp = f.create_group(f"dim_{dim}")
    grp.create_dataset("train", data=train_pca.cpu())
    grp.create_dataset("test", data=test_pca.cpu())

print("LDA reduced data saved to reduced_data/lda.h5")

In [None]:
from plot import plot_accuracy_reduced_dim
from knn import KNN
from svm import SVM
import h5py

def cal_accuracies(reduced_data, classifier):
  accuracies = []
  with h5py.File(reduced_data, "r") as f:
    for dim in reduced_dims:
      train_data = torch.from_numpy(f[f"dim_{dim}/train"][:]).float()
      test_data = torch.from_numpy(f[f"dim_{dim}/test"][:]).float()
      if torch.cuda.is_available():
        train_data = train_data.cuda()
        test_data = test_data.cuda()
      classifier.fit(train_data, train_label)
      accuracy = classifier.evaluate(test_data, test_label)
      accuracies.append(accuracy)
    return accuracies

knn = KNN(k=3)
svm = SVM()
reduced_dims = range(1, 9)

plot_accuracy_reduced_dim(
  reduced_dims,
  [
    cal_accuracies("reduced_data/pca.h5", knn),
    cal_accuracies("reduced_data/lda.h5", knn),
    cal_accuracies("reduced_data/pca.h5", svm),
    cal_accuracies("reduced_data/lda.h5", svm),
  ],
  [
    "KNN with PCA reduced",
    "KNN with LDA reduced",
    "SVM with PCA reduced",
    "SVM with LDA reduced",
  ],
)
