In [1]:
import os
# 設置環境變數
os.environ["LOKY_MAX_CPU_COUNT"] = "2"

In [2]:
import os
import numpy as np
from skimage import io
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier

# 訓練資料集
train_img = io.ImageCollection('./data/*_[1-9].png')
test_img = io.ImageCollection('./data/*_10.png')
N = len(train_img)

# 圖像尺寸
w, h = 56, 46

# 初始化資料陣列
data = np.zeros([N, w * h])
eigen_face = np.zeros([N, w, h])

In [3]:
# 計算均值臉
mean_img = np.mean(train_img, axis=0, dtype=float)
mean_img_re = mean_img.reshape(1, w * h)

# 將每張訓練圖像轉換成向量並減去均值臉
for i in range(N):
    data[i] = train_img[i].reshape(1, w * h) - mean_img_re

In [4]:
# 執行PCA以獲取特徵臉
pca = PCA(n_components=N)
pca.fit(data)
eigen_re = pca.components_

# 創建保存特徵臉的目錄
output_dir = './images/pca/'
os.makedirs(output_dir, exist_ok=True)

# 保存特徵臉圖像
for n in range(N):
    eigen_face[n] = (eigen_re[n].reshape(w, h) - eigen_re[n].min()) / (eigen_re[n].max() - eigen_re[n].min())
    io.imsave(os.path.join(output_dir, str(n + 1) + '.png'), (eigen_face[n] * 255).astype(np.uint8))

In [5]:
# 重建圖像
# 這裡選擇第一張圖像進行重建
img = train_img[0]
img0 = img.reshape(1, w * h) - mean_img_re 

# 使用345個主成分進行PCA重建
ipca = PCA(n_components=345).fit(data)
comp = ipca.transform(img0)
new_re = ipca.inverse_transform(comp) + mean_img_re
new = np.reshape(new_re, (w, h))
new_n = (new - new.min()) / (new.max() - new.min())

# 保存重建的圖像
io.imsave('./images/345.png', (new_n * 255).astype(np.uint8))

In [6]:
# 計算均方誤差(MSE)
mse = ((new_re - img.reshape(1, w * h)) ** 2).mean()
print("MSE = ", mse)

MSE =  0.10551581907185724


In [7]:
# KNN驗證
# 標籤 (1-40對應不同的人臉)
label = np.repeat(np.arange(1, 41), 9)

def KNNclassify(k, n):
    """對訓練資料進行 KNN 分類，使用 n 個主成分和 k 個鄰居"""
    X = np.matmul(data, eigen_re[:n].T)
    
    kf = KFold(n_splits=3, shuffle=True)
    accuracy = 0
    
    for train_index, val_index in kf.split(X):
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = label[train_index], label[val_index]
    
        knn = KNeighborsClassifier(n_neighbors=k)
        knn.fit(X_train, y_train)
        val_accuracy = knn.score(X_val, y_val)
    
        accuracy += val_accuracy
    
    return accuracy / 3

# 測試不同k和n的組合的分類效果
for k in [1, 3, 5]:
    for n in [3, 50, 170]:
        print(f"k = {k}, n = {n}, accuracy = {KNNclassify(k, n):.4f}")

k = 1, n = 3, accuracy = 0.6917
k = 1, n = 50, accuracy = 0.9611
k = 1, n = 170, accuracy = 0.9583
k = 3, n = 3, accuracy = 0.6056
k = 3, n = 50, accuracy = 0.9167
k = 3, n = 170, accuracy = 0.8806
k = 5, n = 3, accuracy = 0.5111
k = 5, n = 50, accuracy = 0.8083
k = 5, n = 170, accuracy = 0.7889


In [8]:
# KNN測試
# 生成訓練集和測試集標籤
train_label = np.repeat(np.arange(1, 41), 9)
test_label = np.arange(1, 41)

# 初始化訓練和測試資料矩陣
train_data = np.zeros([360, w * h])
test_data = np.zeros([40, w * h])

# 將訓練集和測試集圖片轉換為向量並減去均值臉
for i in range(len(train_img)):
    train_data[i] = train_img[i].reshape(1, w * h) - mean_img_re

for i in range(len(test_img)):
    test_data[i] = test_img[i].reshape(1, w * h) - mean_img_re

def KNNClassify(k, n):
    """對測試資料進行 KNN 分類，使用 n 個主成分和 k 個鄰居"""
    X_train = np.matmul(train_data, eigen_re[:n].T)
    X_test = np.matmul(test_data, eigen_re[:n].T)
    
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, train_label)
    test_accuracy = knn.score(X_test, test_label)
    
    return test_accuracy

# 測試KNN模型的效果
print(f"Test accuracy with k=1 and n=140: {KNNClassify(1, 140):.4f}")

Test accuracy with k=1 and n=140: 0.9500
