# Assignment 11 — Dimensionality Reduction using PCA
Fashion‑MNIST / MNIST

## Import Libraries

In [None]:

import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import fashion_mnist


## Load Dataset

In [None]:

(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train.shape, X_test.shape


## Normalize Data

In [None]:

X_train = X_train / 255.0
X_test = X_test / 255.0


## Flatten Images

In [None]:

X_train_flat = X_train.reshape(len(X_train), -1)
X_test_flat = X_test.reshape(len(X_test), -1)

X_train_flat.shape, X_test_flat.shape


## Apply PCA

In [None]:

pca = PCA(0.95)  # keep 95% variance
X_train_pca = pca.fit_transform(X_train_flat)
X_test_pca = pca.transform(X_test_flat)

X_train_pca.shape


## Variance Explained Plot

In [None]:

plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel("Number of Components")
plt.ylabel("Cumulative Variance Explained")
plt.title("Variance Explained by PCA Components")
plt.show()


## Reconstruct Images

In [None]:

reconstructed = pca.inverse_transform(X_train_pca[:10]).reshape(-1,28,28)

fig, axes = plt.subplots(2, 10, figsize=(12,3))
for i in range(10):
    axes[0,i].imshow(X_train[i], cmap='gray')
    axes[0,i].axis('off')
    axes[1,i].imshow(reconstructed[i], cmap='gray')
    axes[1,i].axis('off')

axes[0,0].set_ylabel("Original")
axes[1,0].set_ylabel("Reconstructed")
plt.show()


## Classifier Without PCA

In [None]:

clf1 = LogisticRegression(max_iter=200, n_jobs=-1)
clf1.fit(X_train_flat, y_train)
pred1 = clf1.predict(X_test_flat)
acc1 = accuracy_score(y_test, pred1)
acc1


## Classifier With PCA

In [None]:

clf2 = LogisticRegression(max_iter=200, n_jobs=-1)
clf2.fit(X_train_pca, y_train)
pred2 = clf2.predict(X_test_pca)
acc2 = accuracy_score(y_test, pred2)
acc2


## Scatter Plot of First Two PCs

In [None]:

pca2 = PCA(2)
X_vis = pca2.fit_transform(X_train_flat[:2000])

plt.figure(figsize=(5,4))
plt.scatter(X_vis[:,0], X_vis[:,1], c=y_train[:2000], s=5, cmap='tab10')
plt.title("2D Visualization Using PCA")
plt.show()


## Discussion
Write observations about:
- variance vs components
- reconstruction quality
- accuracy before vs after PCA
- when PCA is useful / not useful