In [8]:
import numpy as np

class LDA:
    def fit(self, X, y):
        class_labels = np.unique(y)
        class_means = []
        overall_mean = np.mean(X, axis=0)

        for label in class_labels:
            class_X = X[y == label]
            class_means.append(np.mean(class_X, axis=0))

        self.class_means = np.array(class_means)
        self.overall_mean = overall_mean

        #between-class scatter matrix
        between_class_scatter = np.zeros((X.shape[1], X.shape[1]))
        for label, class_mean in zip(class_labels, class_means):
            n_samples = X[y == label].shape[0]
            diff = (class_mean - overall_mean).reshape(-1, 1)
            between_class_scatter += n_samples * np.dot(diff, diff.T)

        #within-class scatter matrix
        within_class_scatter = np.zeros((X.shape[1], X.shape[1]))
        for label, class_mean in zip(class_labels, class_means):
            class_X = X[y == label]
            diff = class_X - class_mean
            within_class_scatter += np.dot(diff.T, diff)

        eigen_values, eigen_vectors = np.linalg.eig(np.linalg.inv(within_class_scatter) @ between_class_scatter)

        sorted_indices = np.argsort(eigen_values)[::-1]
        self.eigen_vectors = eigen_vectors[:, sorted_indices]

    def transform(self, X, n_components):
        return np.dot(X, self.eigen_vectors[:, :n_components])

In [9]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

X_train_2d = X_train_filtered.reshape(X_train_filtered.shape[0], -1)
X_test_2d = X_test_filtered.reshape(X_test_filtered.shape[0], -1)

lda = LDA(n_components=None)
X_train_lda = lda.fit_transform(X_train_2d, y_train_filtered)
X_test_lda = lda.transform(X_test_2d)

classifiers = {
    'SVM': SVC(),
    'Random Forest': RandomForestClassifier(),
    'KNN': KNeighborsClassifier(),
    'Gradient Boosting': GradientBoostingClassifier()
}

for clf_name, clf in classifiers.items():
    clf.fit(X_train_lda, y_train_filtered)
    y_pred = clf.predict(X_test_lda)
    accuracy = accuracy_score(y_test_filtered, y_pred)
    print(f"{clf_name} accuracy:", accuracy)


SVM accuracy: 0.8837209302325582
Random Forest accuracy: 0.875968992248062
KNN accuracy: 0.8643410852713178
Gradient Boosting accuracy: 0.8372093023255814
