In [None]:
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt

# Load digits dataset
digits = load_digits()

# Helper function to plot digit image
def plot_digit(index):
    plt.imshow(digits.images[index], cmap='gray')
    plt.title(f"Label: {digits.target[index]}")
    plt.axis('off')
    plt.show()

# Example: Plot the 0th digit
plot_digit(0)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    digits.data, digits.target, test_size=0.2, random_state=42)

# Fit logistic regression
model = LogisticRegression(max_iter=10000)
model.fit(X_train, y_train)

# Predict and calculate accuracy
y_pred = model.predict(X_test)
acc_original = accuracy_score(y_test, y_pred)
print(f"Accuracy without PCA: {acc_original:.4f}")


In [None]:
from sklearn.decomposition import PCA

# Apply PCA to keep 95% variance
pca = PCA(n_components=0.95)
pca.fit(X_train)

# How many components?
print(f"Number of PCA components to explain 95% variance: {pca.n_components_}")


In [None]:
# Transform train and test data
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

# Fit logistic regression on PCA-transformed data
model_pca = LogisticRegression(max_iter=10000)
model_pca.fit(X_train_pca, y_train)

# Predict and calculate accuracy
y_pred_pca = model_pca.predict(X_test_pca)
acc_pca = accuracy_score(y_test, y_pred_pca)
print(f"Accuracy with PCA: {acc_pca:.4f}")

# Compare results
print(f"Accuracy Difference: {acc_original - acc_pca:.4f}")


In [None]:
from sklearn.metrics import confusion_matrix

# Confusion matrix
cm = confusion_matrix(y_test, y_pred_pca)

# Count misclassified samples
wrong_indices = [i for i in range(len(y_test)) if y_test[i] != y_pred_pca[i]]
print(f"Total misclassified samples: {len(wrong_indices)}")

# Plot wrong predictions
for i in wrong_indices[:10]:  # limiting to 10 plots
    plt.imshow(X_test[i].reshape(8, 8), cmap='gray')
    plt.title(f"Predicted: {y_pred_pca[i]}, Actual: {y_test[i]}")
    plt.axis('off')
    plt.show()
