In [3839]:
# 导入库函数
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse.linalg import eigs

In [3840]:
# LPP算法函数

def knn_graph(Data, method, k):
    n = Data.shape[1]  # 修改此处获取样本点的数量
    knn_adjacency_matrix = np.zeros((n, n))  
    distances = np.sqrt(np.sum((Data.T[:, :, None] - Data.T[:, :, None].T) ** 2, axis=1)) # 修改计算欧式距离矩阵的方式
    if method == 'epsilon':
        return knn_adjacency_matrix, distances
    indices = np.argsort(distances, axis=1)[:, 1:k+1]
    for i in range(n):
        knn_adjacency_matrix[i, indices[i]] = 1
        knn_adjacency_matrix[indices[i], i] = 1
    return knn_adjacency_matrix, distances

def compute_avg_radius(n, distances): 
    radius = np.zeros(n)
    for i in range(n):
        avg_radius = np.mean(distances[:, i])  # 修改计算每个数据点的平均邻域半径的方式
        radius[i] = avg_radius
    return radius

def compute_knn_average_radius(distances, k):
    sorted_distances = np.sort(distances, axis=1)  # 对距离矩阵的每一行进行排序
    avg_knn_distances = np.mean(sorted_distances[:, 1:k+1], axis=1)  # 计算每个数据点的前k个距离的平均值作为半径
    return avg_knn_distances

def compute_neighborhood_matrix(Data, method, k):
    n = Data.shape[1]  # 修改获取样本点的数量的方式
    knn_adjacency_matrix, distances = knn_graph(Data, method, k)
    if method == 'knn':
        return knn_adjacency_matrix, distances
    adjacency_matrix = np.zeros((n, n))
    radius = compute_knn_average_radius(distances, k)
    for i in range(n):
        neighbors = np.where(distances[:, i] <= radius[i])[0]  # 修改获取epsilon邻域内的样本索引的方式
        adjacency_matrix[i, neighbors] = 1
        adjacency_matrix[neighbors, i] = 1
    return adjacency_matrix, distances

def construct_weight_matrix(Data, method, k, t):
    n = Data.shape[1]  # 修改获取样本点的数量的方式
    Weight_matrix = np.zeros((n, n))
    adjacency_matrix, distances = compute_neighborhood_matrix(Data, method, k)
    similarity_matrix = np.exp(-distances ** 2 / t)
    i_indices, j_indices = np.where(adjacency_matrix == 1)
    Weight_matrix[i_indices, j_indices] = similarity_matrix[i_indices, j_indices]
    Weight_matrix[j_indices, i_indices] = similarity_matrix[i_indices, j_indices]
    Weight_matrix += np.exp(-distances ** 2 / t)
    return Weight_matrix

In [3841]:
def PCA(X, n_components):
    # 计算数据矩阵的均值
    mean = np.mean(X, axis=0)
    # 中心化数据矩阵
    X_centered = X - mean
    # 计算数据矩阵的协方差矩阵
    covariance_matrix = np.cov(X_centered, rowvar=False)
    # 计算协方差矩阵的特征值和特征向量
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
    # 对特征向量按特征值从大到小排序
    sorted_indices = np.argsort(eigenvalues)[::-1]
    sorted_eigenvectors = eigenvectors[:, sorted_indices]
    # 选取前n_components个特征向量
    principal_components = sorted_eigenvectors[:, :n_components]
    # 返回投影矩阵和均值向量
    return principal_components

In [3842]:
# MLDA算法函数

# 计算每个类别的均值矩阵
def compute_classes_mean_matrix(train_data, train_labels):
    num_classes = len(np.unique(train_labels))  # 类别数量
    print("类别数量：", num_classes)
    num_samples_per_class = train_data.shape[0] // num_classes  # 每个类别的样本数
    print("每个类别的样本数：", num_samples_per_class)
    num_features = train_data.shape[1]  # 每个样本的特征维度
    print("每个样本的特征维度：", num_features)
    means = np.zeros((num_classes, num_features))  # 存储每个类别的均值矩阵
    for i in range(1, num_classes + 1):  # 遍历每个类别
        temp_indices = np.where(train_labels == i)[0]  # 获取当前类别的训练数据索引
        temp_sum = np.sum(train_data[temp_indices], axis=0)  # 计算当前类别的特征值总和
        means[i-1] = temp_sum / num_samples_per_class  # 计算当前类别的均值
    return means  # 返回每个类别的均值矩阵
"""
# 计算所有类别的整体均值矩阵
def compute_overall_mean_matrix(classes_means):
    overall_mean = np.mean(classes_means, axis=0)  # 计算所有类别的整体均值
    return overall_mean.reshape(-1, 1)  # 返回整体均值矩阵（转置）

# 计算中心类别矩阵
def compute_center_class_matrix(train_data, train_labels, classes_means):
    Z = np.zeros_like(train_data)  # 初始化中心类别矩阵
    
    for i in range(train_data.shape[0]):  # 遍历训练数据
        class_index = int(train_labels[i]) - 1  # 获取当前样本所属类别的索引
        Z[i] = train_data[i] - classes_means[class_index]  # 计算中心类别矩阵
        
    return Z  # 返回中心类别矩阵

# 计算类间散布矩阵
def compute_between_class_scatter_matrix(classes_means, overall_mean):
    n = 5  # 训练集与测试集的比例
    Sb = np.zeros((classes_means.shape[1], classes_means.shape[1]))  # 初始化类间散布矩阵
    for i in range(classes_means.shape[0]):  # 遍历每个类别的均值矩阵
        Sb = np.add(Sb, n * ((classes_means[i] - overall_mean) * (classes_means[i] - overall_mean).T))  # 计算类间散布矩阵
    return Sb  # 返回类间散布矩阵

# 计算类内散布矩阵
def compute_class_scatter_matrix(Z):
    Sw = np.dot(Z.T, Z)  # 计算类内散布矩阵
    return Sw  # 返回类内散布矩阵
"""

'\n# 计算所有类别的整体均值矩阵\ndef compute_overall_mean_matrix(classes_means):\n    overall_mean = np.mean(classes_means, axis=0)  # 计算所有类别的整体均值\n    return overall_mean.reshape(-1, 1)  # 返回整体均值矩阵（转置）\n\n# 计算中心类别矩阵\ndef compute_center_class_matrix(train_data, train_labels, classes_means):\n    Z = np.zeros_like(train_data)  # 初始化中心类别矩阵\n    \n    for i in range(train_data.shape[0]):  # 遍历训练数据\n        class_index = int(train_labels[i]) - 1  # 获取当前样本所属类别的索引\n        Z[i] = train_data[i] - classes_means[class_index]  # 计算中心类别矩阵\n        \n    return Z  # 返回中心类别矩阵\n\n# 计算类间散布矩阵\ndef compute_between_class_scatter_matrix(classes_means, overall_mean):\n    n = 5  # 训练集与测试集的比例\n    Sb = np.zeros((classes_means.shape[1], classes_means.shape[1]))  # 初始化类间散布矩阵\n    for i in range(classes_means.shape[0]):  # 遍历每个类别的均值矩阵\n        Sb = np.add(Sb, n * ((classes_means[i] - overall_mean) * (classes_means[i] - overall_mean).T))  # 计算类间散布矩阵\n    return Sb  # 返回类间散布矩阵\n\n# 计算类内散布矩阵\ndef compute_class_scatter_matrix

In [3843]:
def ShowEigenface(eigenfaces, faceshape):
    # 显示前16个特征脸
    fig, axes = plt.subplots(4, 4, sharex=True, sharey=True, figsize=(8, 10))
    for i in range(16):
        axes[i%4][i//4].imshow(eigenfaces[:, i].reshape(faceshape), cmap="gray")
    plt.show()

In [3844]:
"""
def LPP(train_data, train_labels, method, d, k, t):
    Data = train_data.T
    n = len(train_labels)
    Weight_matrices = np.zeros((n, n)) # 存储每个类别的权重矩阵
    Degree_matrices = np.zeros((n, n)) # 存储每个类别的度矩阵
    for class_label in np.unique(train_labels):
        class_indices = np.where(train_labels == class_label)[0]  # 获取当前类别的样本索引
        class_train_data = Data[:, class_indices]  # 获取当前类别的样本数据
        Weight_matrix = construct_weight_matrix(class_train_data, method, k, t)  # 计算当前类别的权重矩阵
        Degree_matrix = np.diag(np.sum(Weight_matrix, axis=1))  # 计算当前类别的度矩阵
        # 将Degree_matrix和Weight_matrix计算矩阵的迹并添加到对角线
        class_idx = class_label - 1
        Weight_matrices[class_idx, class_idx] = np.trace(Weight_matrix)
        Degree_matrices[class_idx, class_idx] = np.trace(Degree_matrix)
    Laplacian_matrices = Degree_matrices - Weight_matrices
    return Laplacian_matrices, Data
"""

'\ndef LPP(train_data, train_labels, method, d, k, t):\n    Data = train_data.T\n    n = len(train_labels)\n    Weight_matrices = np.zeros((n, n)) # 存储每个类别的权重矩阵\n    Degree_matrices = np.zeros((n, n)) # 存储每个类别的度矩阵\n    for class_label in np.unique(train_labels):\n        class_indices = np.where(train_labels == class_label)[0]  # 获取当前类别的样本索引\n        class_train_data = Data[:, class_indices]  # 获取当前类别的样本数据\n        Weight_matrix = construct_weight_matrix(class_train_data, method, k, t)  # 计算当前类别的权重矩阵\n        Degree_matrix = np.diag(np.sum(Weight_matrix, axis=1))  # 计算当前类别的度矩阵\n        # 将Degree_matrix和Weight_matrix计算矩阵的迹并添加到对角线\n        class_idx = class_label - 1\n        Weight_matrices[class_idx, class_idx] = np.trace(Weight_matrix)\n        Degree_matrices[class_idx, class_idx] = np.trace(Degree_matrix)\n    Laplacian_matrices = Degree_matrices - Weight_matrices\n    return Laplacian_matrices, Data\n'

In [3845]:
def DLPP_LPP(Data, d, method, k, t):
    Data = Data.T
    Weight_matrix = construct_weight_matrix(Data, method, k, t)
    Degree_matrix = np.diag(np.sum(Weight_matrix, axis=1))
    Laplacian_matrix = Degree_matrix - Weight_matrix
    return Laplacian_matrix, Data


In [3846]:
def LPP(Data, d, method, k, t):
    Data = Data.T
    Weight_matrix = construct_weight_matrix(Data, method, k, t)
    Degree_matrix = np.diag(np.sum(Weight_matrix, axis=1))
    Laplacian_matrix = Degree_matrix - Weight_matrix
    print("LPP拉普拉斯矩阵形状：", Laplacian_matrix.shape)
    objective_value = np.dot(np.dot(Data, Laplacian_matrix), Data.T)  # 计算目标函数
    eigenvalues, eigenvectors = eigs(objective_value, k=d+1)
    sorted_indices = np.argsort(eigenvalues.real)
    selected_indices = sorted_indices[1:d + 1]
    selected_eigenvectors = eigenvectors.real[:, selected_indices]
    return selected_eigenvectors

In [3847]:
def MLDA(train_data, train_labels, d):
    # 计算每个类别的均值矩阵
    classes_means = compute_classes_mean_matrix(train_data, train_labels)
    """
    print('classes_means形状：', classes_means.shape)

    # 计算所有类别的整体均值矩阵
    overall_mean = compute_overall_mean_matrix(classes_means)
    print('overall_mean形状：', overall_mean.shape)

    # 计算中心类别矩阵
    Z = compute_center_class_matrix(train_data, train_labels, classes_means)
    print('Z形状：', Z.shape)

    # 计算类间散布矩阵
    Sb = compute_between_class_scatter_matrix(classes_means, overall_mean)
    print('Sb形状：', Sb.shape)

    # 计算类内散布矩阵
    Sw = compute_class_scatter_matrix(Z)
    print('Sw形状：', Sw.shape)
    """
    return classes_means.T

In [3848]:
def DLPP(train_data, train_labels, p, d, method, k, t):
    # Step 1: 使用MLDA进行特征提取
    F = MLDA(train_data, train_labels, d)
    print("F形状:", F.shape)
    # Step 2: 使用LPP进行特征提取
    L, X = DLPP_LPP(train_data, d, method, k, t)
    print("拉普拉斯矩阵形状L:", L.shape)
    # Step 3: 计算权重矩阵B
    num_classes = len(np.unique(train_labels))  # 计算训练集中的类别数
    B = np.zeros((num_classes, num_classes))  # 初始化权重矩阵B
    # 遍历每对类别，计算其对应的权重
    for i in range(num_classes):  # 遍历每个类别
        for j in range(num_classes):  # 再次遍历每个类别
            if i != j:  # 如果类别不相同
                fi = F[i]  # 获取第i个类别的平均脸
                fj = F[j]  # 获取第j个类别的平均脸
                # 计算第i类别和第j类别平均脸之间的欧氏距离，并将其应用于高斯核函数，计算权重
                B[i, j] = np.exp(-np.linalg.norm(fi - fj) ** 2 / t)
    # Step 4: 计算E和H矩阵
    E = np.diag(np.sum(B, axis=1))
    H = E - B
    print("H形状:", H.shape)
    # Step 5: 计算目标函数的分母和分子
    denominator = np.dot(np.dot(F, H), F.T) + 1e-10  # 添加一个微小的非零值，以避免除以零
    #print("分母形状:", denominator.shape)
    numerator = np.dot(np.dot(X, L), X.T)
    #print("分子形状:", numerator.shape)
    # Step 6: 分式
    objective_value = numerator / denominator
    # Step 7: 求解广义特征值问题的特征值和特征向量
    eigenvalues, eigenvectors = eigs(objective_value, k=d+1)
    sorted_indices = np.argsort(eigenvalues.real)
    selected_indices = sorted_indices[1:d + 1]  
    selected_eigenvectors = eigenvectors.real[:, selected_indices]
    return selected_eigenvectors

In [3849]:
# 读取数据集
def read_images(dataset_dir, target_size=(32, 32)):
    data = []  # 存储图像数据的列表
    labels = []  # 存储标签的列表
    faceshape = [] # 存储图像形状
    for class_dir in os.listdir(dataset_dir):  # 遍历数据集文件夹中的文件夹（每个文件夹代表一个类别）
        class_path = os.path.join(dataset_dir, class_dir)  # 类别文件夹路径
        for file_name in os.listdir(class_path):  # 遍历每个类别文件夹中的图像文件
            file_path = os.path.join(class_path, file_name)  # 图像文件路径
            img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)  # 读取灰度图像
            # 缩放图像至目标尺寸
            img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)
            # 读取第一张灰度图像的大小作为图片形状
            faceshape = img.shape
            data.append(img.flatten())  # 将图像展平并添加到数据列表中
            labels.append(int(class_dir))  # 将类别标签添加到标签列表中
    return np.array(data), np.array(labels).reshape(-1, 1), faceshape  # 返回图像数据和标签

# 训练集和测试集划分(按顺序划分)
def train_test_split(data, labels, train_test_split_ratio):
    num_samples = data.shape[0]  # 总样本数
    num_classes = len(np.unique(labels))  # 类别数
    train_samples_per_class = int(train_test_split_ratio * num_samples / num_classes)  # 每个类别的训练样本数
    
    train_indices = []
    test_indices = []
    for i in range(1, num_classes + 1):  # 对每个类别
        class_indices = np.where(labels == i)[0]  # 获取当前类别的索引
        train_indices.extend(class_indices[:train_samples_per_class])  # 将前面部分作为训练集
        test_indices.extend(class_indices[train_samples_per_class:])  # 将后面部分作为测试集
    
    train_data = data[train_indices]
    train_labels = labels[train_indices]
    test_data = data[test_indices]
    test_labels = labels[test_indices]
    
    return train_data, train_labels, test_data, test_labels

In [3850]:
"""
from sklearn.datasets import make_swiss_roll

# 设置参数
n_samples = 1000  # 数据点数量
noise = 0.2  # 噪声水平
random_state = 42  # 随机种子，用于重现结果

# 生成带有标签信息的瑞士卷数据集
X, labels = make_swiss_roll(n_samples=n_samples, noise=noise, random_state=random_state)
X = X.T
labels = np.array(labels).reshape(-1, 1)
print("X形状:", X.shape)
print("labels形状:", labels.shape)
reduced_data = DLPP(X, labels, 2, 'knn', 10, 1000)


# 打印数据集的形状
print("数据集形状:", X.shape)
print("标签形状:", y.shape)

# 可视化瑞士卷数据集
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.Spectral)
ax.set_title('带有标签信息的瑞士卷数据集')
plt.show()


plt.figure(figsize=(8, 6))
plt.scatter(reduced_data[:, 0], reduced_data[:, 1], c=labels, cmap='viridis')
plt.title('DLPP降维后的结果（二维）')
plt.xlabel('特征1')
plt.ylabel('特征2')
plt.colorbar(label='类别')
plt.show()
"""

'\nfrom sklearn.datasets import make_swiss_roll\n\n# 设置参数\nn_samples = 1000  # 数据点数量\nnoise = 0.2  # 噪声水平\nrandom_state = 42  # 随机种子，用于重现结果\n\n# 生成带有标签信息的瑞士卷数据集\nX, labels = make_swiss_roll(n_samples=n_samples, noise=noise, random_state=random_state)\nX = X.T\nlabels = np.array(labels).reshape(-1, 1)\nprint("X形状:", X.shape)\nprint("labels形状:", labels.shape)\nreduced_data = DLPP(X, labels, 2, \'knn\', 10, 1000)\n\n\n# 打印数据集的形状\nprint("数据集形状:", X.shape)\nprint("标签形状:", y.shape)\n\n# 可视化瑞士卷数据集\nimport matplotlib.pyplot as plt\nfrom mpl_toolkits.mplot3d import Axes3D\n\nfig = plt.figure()\nax = fig.add_subplot(111, projection=\'3d\')\nax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.Spectral)\nax.set_title(\'带有标签信息的瑞士卷数据集\')\nplt.show()\n\n\nplt.figure(figsize=(8, 6))\nplt.scatter(reduced_data[:, 0], reduced_data[:, 1], c=labels, cmap=\'viridis\')\nplt.title(\'DLPP降维后的结果（二维）\')\nplt.xlabel(\'特征1\')\nplt.ylabel(\'特征2\')\nplt.colorbar(label=\'类别\')\nplt.show()\n'

In [3851]:
def test_image(i, train_labels, test_labels, query, dlpp_weight_matrix):
    # 计算测试图像的权重向量
    #print("query形状:", query.shape)
    #print("dlpp_eigenfaces形状:", dlpp_eigenfaces.shape)
    #print("dlpp_weight_matrix形状:", dlpp_weight_matrix.shape)
    query = query.reshape(-1, 1)
    # 计算测试图像权重与数据集中每个人脸权重的欧氏距离
    euclidean_distances = np.linalg.norm(dlpp_weight_matrix - query, axis=0)
    # 找到最佳匹配的人脸
    best_match_index = np.argmin(euclidean_distances)
    #判断是否匹配正确
    flag = True
    if train_labels[best_match_index] == test_labels[i]:
        flag = True
    else:
        flag = False
    return flag

In [3852]:
#from sklearn.datasets import load_digits
#digits = load_digits()
#data = digits.data
#labels = digits.target
#images = digits.images

dataset = "ORL"
data, labels, faceshape = read_images(dataset)
train_data, train_labels, test_data, test_labels = train_test_split(data, labels, train_test_split_ratio=0.5)
print("训练集X形状：", train_data.shape)
print("训练集标签形状：", train_labels.shape)
p = 70
d = 40
k = 5
t = 87250
lpp_method = 'knn'  # 'knn'或'epsilon'


PCA_eigenfaces = PCA(train_data, p)
PCA_weight_matrix = PCA_eigenfaces.T @ train_data.T
print("PCA特张脸/PCA主成分/PCA变换矩阵A形状:", PCA_eigenfaces.shape)
print("PCA子空间/PCA权重矩阵Y形状:", PCA_weight_matrix.shape)
#ShowEigenface(PCA_eigenfaces, faceshape)
train_data_by_pca = PCA(train_data.T, p)
print("PCA提取训练数据特征的主成分形状:", train_data_by_pca.shape)


训练集X形状： (200, 1024)
训练集标签形状： (200, 1)


PCA特张脸/PCA主成分/PCA变换矩阵A形状: (1024, 70)
PCA子空间/PCA权重矩阵Y形状: (70, 200)
PCA提取训练数据特征的主成分形状: (200, 70)


In [3853]:
dlpp_eigenvectors = DLPP(train_data_by_pca, train_labels, p, d, lpp_method, k, t)
print("DLPP变换矩阵A形状:", dlpp_eigenvectors.shape)
dlpp_weight_matrix = dlpp_eigenvectors.T @ train_data_by_pca.T
print("DLPP子空间/权重矩阵Y形状:", dlpp_weight_matrix.shape)
#ShowEigenface(dlpp_eigenvectors, faceshape)

test_data_by_pca = PCA(test_data.T, p)
print("PCA提取测试数据特征的主成分形状:", test_data_by_pca.shape)

dlpp_test_data = dlpp_eigenvectors.T @ test_data_by_pca.T
print("DLPP子空间的测试集形状:", dlpp_test_data.shape)

# 识别率统计
wrong_times = 0
right_times = 0
for i in range(test_data.shape[0]):
    flag = test_image(i, train_labels, test_labels, dlpp_test_data[:,i], dlpp_weight_matrix)
    if flag:
                right_times += 1
    else:
        wrong_times += 1
rate = right_times / test_data.shape[0]
print("识别率：", rate)

类别数量： 40
每个类别的样本数： 5
每个样本的特征维度： 70
F形状: (70, 40)
拉普拉斯矩阵形状L: (200, 200)
H形状: (40, 40)
DLPP变换矩阵A形状: (70, 40)
DLPP子空间/权重矩阵Y形状: (40, 200)
PCA提取测试数据特征的主成分形状: (200, 70)
DLPP子空间的测试集形状: (40, 200)
识别率： 0.0


In [3854]:
lpp_eigenvectors = LPP(train_data_by_pca, d, lpp_method, k, t)
lpp_weight_matrix = lpp_eigenvectors.T @ PCA_weight_matrix
print("LPP变换矩阵A形状:", lpp_eigenvectors.shape)
print("LPP子空间/权重矩阵Y形状:", lpp_weight_matrix.shape)
lpp_test_data = test_data @ PCA_eigenfaces @ lpp_eigenvectors
# 识别率统计
wrong_times = 0
right_times = 0
for i in range(test_data.shape[0]):
    flag = test_image(i, train_labels, test_labels, lpp_test_data[i], lpp_weight_matrix)
    if flag:
                right_times += 1
    else:
        wrong_times += 1
rate = right_times / test_data.shape[0]
print("识别率：", rate)

LPP拉普拉斯矩阵形状： (200, 200)
LPP变换矩阵A形状: (70, 40)
LPP子空间/权重矩阵Y形状: (40, 200)
识别率： 0.855


In [3855]:
pca_test_data = test_data @ PCA_eigenfaces
# 识别率统计
wrong_times = 0
right_times = 0
for i in range(test_data.shape[0]):
    flag = test_image(i, train_labels, test_labels, pca_test_data[i], PCA_weight_matrix)
    if flag:
                right_times += 1
    else:
        wrong_times += 1
rate = right_times / test_data.shape[0]
print("识别率：", rate)

识别率： 0.93
