In [2]:
import numpy as np

# 定义LDA类
class LDA:
    def __init__(self) -> None:
        self.w = None
    def calc_cov(self,X,Y=None):
        m = X.shape[0]
        # 数据标准化
        X = (X-np.mean(X,axis=0))//np.std(X,axis=0)
        Y = X if Y == None else\
            (Y-np.mean(Y,axis=0))//np.std(Y,axis=0)
        return 1/m*np.matmul(X.T,Y)

    def project(self,X,y):
        self.fit(X,y)
        # 数据投影
        X_projection = X.dot(self.w)
        return X_projection

    def fit(self,X,y):
        # 按类别分组
        X0 = X[y==0]
        X1 = X[y==1]
        # 分别计算两类数据自变量的协方差矩阵
        sigma0 = self.calc_cov(X0)
        sigma1 = self.calc_cov(X1)
        # 计算类内相似度矩阵
        Sw = sigma0 + sigma1
        # 分别计算两类数据自变量的均值和方差
        mu0,mu1 = np.mean(X0,axis=0),np.mean(X1,axis=0)
        mean_diff = np.atleast_1d(mu0-mu1) # 这步是什么意思
        # 对类内散度进行奇异值分解
        U,S,V = np.linalg.svd(Sw)
        # 计算类内散度矩阵的逆
        Sw_ = np.dot(np.dot(V.T,np.linalg.pinv(np.diag(S))),U.T)
        # 计算W
        self.w = Sw_.dot(mean_diff)
        
    def predict(self,X):
        # 初始化预测结果为空列表
        y_pred = []
        for x_i in X:
            h = x_i.dot(self.w)
            y = 1*(h<0) # 进行分类
            y_pred.append(y)
        return y_pred

In [9]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# 导入数据
data = datasets.load_iris()
# 数据与标签
X,y = data.data,data.target
# 取标签不为2的数据，即就是0，1分类
X = X[y!=2]
y = y[y!=2]
# 划分训练集和测试集
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)
# 实例化LDA类
lda = LDA()
# LDA模型训练
lda.fit(X_train,y_train)
# LDA模型预测
y_pred = lda.predict(X_test)
# acc
acc = accuracy_score(y_test,y_pred)
print("acc: " ,acc)

acc:  1.0
