# 线性判别分析（LDA）

## 基于NumPy的实现

In [1]:
import numpy as np

### 算法流程

In [9]:
### 定义LDA类
class LDA:
    def __init__(self):
        # 初始化权重矩阵
        self.w = None
    # 协方差矩阵计算方法
    def calc_cov(self, X, Y=None):
        m = X.shape[0]
        # 数据标准化
        X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
        Y = X if Y is None else (Y - np.mean(Y, axis=0)) / np.std(Y, axis=0)
        return np.matmul(X.T, Y) / (m - 1)

    # 数据投影方法
    def project(self, X, y):
        # LDA拟合获取模型权重
        self.fit(X, y)
        # 数据投影
        X_projection = X.dot(self.w)
        return X_projection

    # LDA拟合方法
    def fit(self, X, y):
        # (1)按类分组
        X0 = X[y == 0]
        X1 = X[y == 1]
        # (2)分别计算两类数据自变量的协方差矩阵
        sigma0 = self.calc_cov(X0)
        sigma1 = self.calc_cov(X1)
        # (3)计算类内散度矩阵
        Sw = sigma0 + sigma1
        # (4)分别计算两类数据自变量的均值和差
        u0, u1 = np.mean(X0, axis=0), np.mean(X1, axis=0)
        mean_diff = np.atleast_1d(u0 - u1)
        # (5)对类内散度矩阵进行奇异值分解
        U, S, V = np.linalg.svd(Sw)
        # (6)计算类内散度矩阵的(伪)逆
        Sw_ = np.dot(np.dot(V.T, np.linalg.pinv(np.diag(S))), U.T)
        # (7)计算w
        self.w = Sw_.dot(mean_diff)

    # LDA分类预测
    def predict(self, X):
        # 初始化预测结果为空列表
        y_pred = []
        # 遍历待预测样本
        for x_i in X:
            # 模型预测
            h = x_i.dot(self.w)
            y = 1 * (h < 0)
            y_pred.append(y)
        return y_pred

### 数据测试

In [3]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [4]:
# 导入数据集
data = datasets.load_iris()
X, y = data.data, data.target

In [6]:
# 取标签不为2的数据
X, y = X[y != 2], y[y != 2]
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=41)

In [10]:
# 创建LDA模型实例
lda = LDA()
# LDA模型拟合
lda.fit(X_train, y_train)
# LDA模型预测
y_pred = lda.predict(X_test)
# 测试集上的分类准确率
acc = accuracy_score(y_test, y_pred)
print("Accuracy of NumPy LDA:", acc)

Accuracy of NumPy LDA: 0.85


## 基于sklearn的实现

In [11]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [12]:
# 创建LDA分类器
clf = LinearDiscriminantAnalysis()
# 模型拟合
clf.fit(X_train, y_train)
# 模型预测
y_pred = clf.predict(X_test)
# 测试集上的分类准确率
acc = accuracy_score(y_test, y_pred)
print("Accuracy of Sklearn LDA:", acc)

Accuracy of Sklearn LDA: 1.0
