## LAPP

LPP在选择k个最近邻时存在风险，因为原始数据集中存在噪音。如果错误确定了k个最近邻，整个算法的性能将降低。LAPP采用了一种从粗到细的策略，迭代LPP单元，直到找到满足少量噪声要求的最优子空间。LAPP的伪代码如下：

Input：$X= [x_1, x_2, x_3 ... x_m] \in \mathbb{R^{m \times n}}$，最终维度$d$，阈值$\delta$，最大迭代次数$T$。<br>
Output：变换矩阵$A \in \mathbb{R^{m \times d}}$。<br>
**1.** 计算相似矩阵S ($x_i$, $x_j$)。<br>
**2.** 根据相似矩阵S计算$S$，$D$和$L$。<br>
**3.** 解广义特征值问题，得到$A$。<br>
**4.** 迭代 = 0。<br>
**5.** __while__ iteration < T:  
&emsp;&emsp;&emsp;获取变换后的数据 $X_0 = A^TX$  
&emsp;&emsp;&emsp;$A_0 = A$  
&emsp;&emsp;&emsp;测量相似矩阵 S ($x_i$, $x_j$)  
&emsp;&emsp;&emsp;计算 $S$, $D$ and $L$  
&emsp;&emsp;&emsp;解广义特征值问题，得到$A$  
&emsp;&emsp;&emsp;__if__ diff($A - A_0$) < $\delta$:  
&emsp;&emsp;&emsp;&emsp;$A = A_0$, __break;__  
&emsp;&emsp;&emsp;__end if__  
iteration = iteration + 1  
__end while__
__return__ $A$<br>

In [83]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse.linalg import eigs

In [84]:
def compute_similarity_matrix(Data):
    n = len(Data)
    similarities = np.zeros((n, n))
    for i in range(n):
        for j in range(i+1, n):
            similarity = np.exp(-np.linalg.norm(Data[i] - Data[j])**2)
            similarities[i, j] = similarity
            similarities[j, i] = similarity
    return similarities

def LAPP(Data, d, delta, T):
    n = len(Data)
    A = np.eye(n)  # 初始化变换矩阵为单位矩阵
    iteration = 0  # 初始化迭代次数
    
    # 计算初始相似度矩阵
    S = compute_similarity_matrix(Data)
    
    while iteration < T:
        # 进行特征映射
        eigenvalues, eigenvectors = eigs(A.T @ S @ A, k=d+1, which='SR')
        sorted_indices = np.argsort(eigenvalues.real)
        selected_indices = sorted_indices[1:d + 1]
        selected_eigenvectors = eigenvectors.real[:, selected_indices]
        
        # 更新变换矩阵
        A_old = A.copy()
        A = selected_eigenvectors.T
        
        # 计算新的相似度矩阵
        S = A @ S @ A.T
        
        # 检查迭代是否收敛
        diff = np.linalg.norm(A - A_old)
        if diff < delta:
            break
        
        # 更新迭代次数
        iteration += 1
    
    return A


In [85]:
def read_images(dataset_dir, target_size=(32, 32)):
    data = []  # 存储图像数据的列表
    labels = []  # 存储标签的列表
    faceshape = [] # 存储图像形状
    for class_dir in os.listdir(dataset_dir):  # 遍历数据集文件夹中的文件夹（每个文件夹代表一个类别）
        class_path = os.path.join(dataset_dir, class_dir)  # 类别文件夹路径
        for file_name in os.listdir(class_path):  # 遍历每个类别文件夹中的图像文件
            file_path = os.path.join(class_path, file_name)  # 图像文件路径
            img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)  # 读取灰度图像
            # 缩放图像至目标尺寸
            img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)
            # 读取第一张灰度图像的大小作为图片形状
            faceshape = img.shape
            data.append(img.flatten())  # 将图像展平并添加到数据列表中
            labels.append(int(class_dir))  # 将类别标签添加到标签列表中
    return np.array(data), np.array(labels).reshape(-1, 1), faceshape  # 返回图像数据和标签

# 训练集和测试集划分
def train_test_split(data, labels, train_test_split_ratio):
    num_samples = data.shape[0]  # 总样本数
    train_samples = int(num_samples * train_test_split_ratio)  # 训练集样本数
    
    # 洗牌算法打乱数据集
    indices = np.arange(num_samples)
    np.random.shuffle(indices)
    data = data[indices]
    labels = labels[indices]
    
    # 划分训练集和测试集
    train_data = data[:train_samples]
    train_labels = labels[:train_samples]
    test_data = data[train_samples:]
    test_labels = labels[train_samples:]
    
    return train_data, train_labels, test_data, test_labels

In [86]:
def test_image(i, faceshape, overall_mean, train_labels, test_labels, train_data, test_data, lpp_eigenfaces, lpp_weight_matrix):
    # 计算测试图像的权重向量
    #print("测试图像形状:", query.shape)
    #print("平均人脸形状:", overall_mean.shape)
    #print("特征脸形状:", dlpp_eigenfaces.shape)
    query = test_data[i]
    query_weight = (lpp_eigenfaces.T @ (query - overall_mean.flatten()).reshape(-1, 1))
    # 计算测试图像与数据集中每个人脸的欧氏距离
    euclidean_distances = np.linalg.norm(lpp_weight_matrix - query_weight, axis=0)
    # 找到最佳匹配的人脸
    best_match_index = np.argmin(euclidean_distances)
    #判断是否匹配正确
    flag = False
    if train_labels[best_match_index] == test_labels[i]:
        flag = True
    else:
        flag = False
    """
    # 可视化
    fig, axes = plt.subplots(1, 2, sharex=True, sharey=True, figsize=(8, 6))
    axes[0].imshow(query.reshape(faceshape), cmap="gray")
    axes[0].set_title("Query Image")
    axes[1].set_xlabel("Euclidean Distance: {:.0f}".format(euclidean_distances[best_match_index]))
    axes[1].imshow(train_data[best_match_index].reshape(faceshape), cmap="gray")
    axes[1].set_title("Best Match")
    plt.show()
    """
    return flag

In [87]:
def ShowEigenface(eigenfaces, faceshape):
    # 显示前16个特征脸
    fig, axes = plt.subplots(4, 4, sharex=True, sharey=True, figsize=(8, 10))
    for i in range(16):
        axes[i%4][i//4].imshow(eigenfaces[:, i].reshape(faceshape), cmap="gray")
    #print("显示特征脸")
    plt.show()

In [88]:
# 读取人脸图像
faces, classes, faceshape = read_images("ORL")  # 读取图像数据和标签

train_data, train_labels, test_data, test_labels = train_test_split(faces, classes, 0.5)  # 划分训练集和测试集
# 打印一些细节
print("人脸图像形状:", faceshape)
print("类别数量:", len(classes))
print("图像数量:", len(faces))

人脸图像形状: (32, 32)
类别数量: 400
图像数量: 400


In [89]:
# 应用LPP并选择前K个主成分作为特征脸

d = 70
k = 135
t = 87250
method = 'knn'
delta = 1e-6
max_iterations = 5
"""
d = 70
k = 450
t = 50000
method = 'epsilon'
"""
train_data = train_data.T
print("训练集形状：", train_data.shape)
#from lpproj import LocalityPreservingProjection
#lpp = LocalityPreservingProjection(n_neighbors=n_neighbors, n_components=n_components)
#lpp.fit(train_data)
#lpp_eigenfaces = lpp.transform(train_data)



lpp_eigenfaces = LAPP(train_data, d, delta, max_iterations)
print("特征脸形状:", lpp_eigenfaces.shape)

overall_mean = np.mean(train_data , axis=1).reshape(-1, 1)
print("平均人脸形状:", overall_mean.shape)

lpp_weight_matrix = lpp_eigenfaces.T @ (train_data-overall_mean) 
print("权重矩阵形状:", lpp_weight_matrix.shape)

ShowEigenface(lpp_eigenfaces, faceshape)


训练集形状： (1024, 200)


ValueError: operands could not be broadcast together with shapes (70,1024) (1024,1024) 

In [None]:
# 识别率统计
wrong_times = 0
right_times = 0
for i in range(test_data.shape[0]):
    flag = test_image(i, faceshape, overall_mean, train_labels, test_labels, train_data, test_data, lpp_eigenfaces, lpp_weight_matrix)
    if flag:
                right_times += 1
    else:
        wrong_times += 1
rate = right_times / test_data.shape[0]
print(f"Recognition Rate: {rate}")

Recognition Rate: 0.87
