<a href="https://colab.research.google.com/github/ahzaidy/Programs/blob/main/CPSC_5410_HW2_P3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.decomposition import PCA

# Load dataset
df = pd.read_csv("MNIST digits0-1-2.csv")
X = df.iloc[:, :-1].values  # Features
y = df.iloc[:, -1].values   # Labels

# Split data (80% training, 20% testing for each class)
train_indices, test_indices = [], []
for label in np.unique(y):
    indices = np.where(y == label)[0]
    train_idx, test_idx = train_test_split(indices, test_size=0.2, random_state=42)
    train_indices.extend(train_idx)
    test_indices.extend(test_idx)

X_train, X_test = X[train_indices], X[test_indices]
y_train, y_test = y[train_indices], y[test_indices]

# Apply LDA with 2 components
lda = LinearDiscriminantAnalysis(n_components=2)
X_train_lda = lda.fit_transform(X_train, y_train)
X_test_lda = lda.transform(X_test)

# Scatter plot for LDA projection
plt.figure(figsize=(10, 5))
for label, marker, color in zip([0, 1, 2], ['o', 's', 'D'], ['r', 'g', 'b']):
    plt.scatter(X_train_lda[y_train == label, 0], X_train_lda[y_train == label, 1],
                marker=marker, color=color, alpha=0.6, label=f'Train {label}')
    plt.scatter(X_test_lda[y_test == label, 0], X_test_lda[y_test == label, 1],
                marker=marker, color=color, edgecolors='k', label=f'Test {label}')

plt.title("LDA Projection of MNIST Digits (0,1,2)")
plt.xlabel("LD1")
plt.ylabel("LD2")
plt.legend()
plt.show()

# Compare with PCA projection
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

plt.figure(figsize=(10, 5))
for label, marker, color in zip([0, 1, 2], ['o', 's', 'D'], ['r', 'g', 'b']):
    plt.scatter(X_train_pca[y_train == label, 0], X_train_pca[y_train == label, 1],
                marker=marker, color=color, alpha=0.6, label=f'Train {label}')
    plt.scatter(X_test_pca[y_test == label, 0], X_test_pca[y_test == label, 1],
                marker=marker, color=color, edgecolors='k', label=f'Test {label}')

plt.title("PCA Projection of MNIST Digits (0,1,2)")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.legend()
plt.show()

# Explanation of 3D LDA projection
print("LDA can project data into at most (c-1) dimensions, where c is the number of classes.")
print("Since we have 3 classes (0,1,2), the maximum dimension for LDA is 2 (3-1). Thus, LDA cannot project to 3D space.")