LOAD FILE ẢNH

In [37]:
import os
from PIL import Image
import numpy as np

def load_images_flat(folder):
    images = []
    labels = []
    for filename in sorted(os.listdir(folder)):
        img_path = os.path.join(folder, filename)
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.pgm')):
            try:
                img = Image.open(img_path).convert('L')
                img = np.asarray(img, dtype=np.float32).flatten()
                images.append(img)
                labels.append(filename.split('.')[0])  # Label theo tên file
            except Exception as e:
                print(f"❌ Không đọc được file: {img_path}, lỗi: {e}")
    return np.array(images), labels



Hàm PCA

In [7]:
import numpy as np

def pca(X, num_components):
    mean_face = np.mean(X, axis=0)
    X_centered = X - mean_face

    # Dùng SVD thay vì covariance matrix
    U, S, Vt = np.linalg.svd(X_centered, full_matrices=False)
    principal_components = Vt[:num_components].T

    return principal_components, mean_face


Hàm đọc ảnh

In [3]:
import os
import numpy as np
from PIL import Image

def load_images(folder):
    images = []
    labels = []
    label_names = sorted(os.listdir(folder))
    for label in label_names:
        label_folder = os.path.join(folder, label)
        if os.path.isdir(label_folder):
            for filename in os.listdir(label_folder):
                img_path = os.path.join(label_folder, filename)
                img = Image.open(img_path).convert('L')
                img = np.asarray(img, dtype=np.float32).flatten()
                images.append(img)
                labels.append(label)
    return np.array(images), labels

Hàm vẽ eigenfaces, tính accuracy

In [4]:
import numpy as np
import matplotlib.pyplot as plt

def plot_eigenfaces(principal_components, img_shape):
    fig, axes = plt.subplots(4, 5, figsize=(10, 8))
    for i, ax in enumerate(axes.flat):
        eigenface = principal_components[:, i].reshape(img_shape)
        ax.imshow(eigenface, cmap='gray')
        ax.axis('off')
    plt.tight_layout()
    plt.show()

def accuracy(predictions, ground_truth):
    correct = np.sum(np.array(predictions) == np.array(ground_truth))
    return correct / len(ground_truth)


TRAIN

In [5]:
X_train, y_train = load_images('C:/Users/pc/Downloads/yalefaces/yalefaces')
print(X_train.shape)
print(len(set(y_train)))  # phải ra 15 labels


(150, 77760)
15


In [8]:
#PCA
NUM_COMPONENTS = 20
principal_components, mean_face = pca(X_train, num_components=NUM_COMPONENTS)


In [9]:
X_train_centered = X_train - mean_face
X_train_projected = X_train_centered @ principal_components


TEST

In [38]:
X_test, y_test = load_images_flat('C:\\Users\\pc\\OneDrive\\Desktop\\test')
print(X_test.shape)
print(y_test)



(15, 77760)
['person-1', 'person-10', 'person-11', 'person-12', 'person-13', 'person-14', 'person-15', 'person-2', 'person-3', 'person-4', 'person-5', 'person-6', 'person-7', 'person-8', 'person-9']


In [39]:
X_test_centered = X_test - mean_face


In [40]:
X_test_projected = X_test_centered @ principal_components


In [41]:
predictions = []
for x in X_test_projected:
    distances = np.linalg.norm(X_train_projected - x, axis=1)
    min_idx = np.argmin(distances)
    predictions.append(y_train[min_idx])  # lấy label train gần nhất


In [42]:
for pred, true_label in zip(predictions, y_test):
    print(f"Ảnh test: {true_label} -> Dự đoán: {pred}")



Ảnh test: person-1 -> Dự đoán: person-1
Ảnh test: person-10 -> Dự đoán: person-10
Ảnh test: person-11 -> Dự đoán: person-11
Ảnh test: person-12 -> Dự đoán: person-12
Ảnh test: person-13 -> Dự đoán: person-13
Ảnh test: person-14 -> Dự đoán: person-6
Ảnh test: person-15 -> Dự đoán: person-15
Ảnh test: person-2 -> Dự đoán: person-12
Ảnh test: person-3 -> Dự đoán: person-3
Ảnh test: person-4 -> Dự đoán: person-4
Ảnh test: person-5 -> Dự đoán: person-5
Ảnh test: person-6 -> Dự đoán: person-6
Ảnh test: person-7 -> Dự đoán: person-7
Ảnh test: person-8 -> Dự đoán: person-7
Ảnh test: person-9 -> Dự đoán: person-9


In [44]:
from sklearn.metrics import accuracy_score

acc = accuracy_score(y_test, predictions)
print(f"✅ Độ chính xác trên tập test: {acc*100:.2f}%")


✅ Độ chính xác trên tập test: 80.00%
