Problem 1A 

In [6]:
import numpy as np
from numpy.linalg import svd
from sklearn.metrics import accuracy_score
import pandas as pd

# Load Data 
X_train = np.loadtxt("project2_data/handwriting_training_set.txt", delimiter=",")
y_train = np.loadtxt("project2_data/handwriting_training_set_labels.txt", delimiter=",").astype(int)

X_test = np.loadtxt("project2_data/handwriting_test_set.txt", delimiter=",")
y_test = np.loadtxt("project2_data/handwriting_test_set_labels_for_Python.txt", delimiter=",").astype(int)

print("Training set:", X_train.shape)
print("Test set:", X_test.shape)

print("Labels loaded:", y_train.shape, y_test.shape)

# Build matrices 
class_matrices = {}  # digit â†’ training examples

for digit in range(10):
    start = digit * 400
    end = (digit + 1) * 400
    class_matrices[digit] = X_train[start:end, :]

print("Class matrices created.")

# Compute SVD for each digit 
svd_left_vectors = {}   # Stores the U matrices for each digit (singular vector basis)

for digit in range(10):
    digit_training_matrix = class_matrices[digit]   # 400 x 400 matrix for this digit
    
    # Perform SVD on the transpose so the left singular vectors span pixel space
    U_matrix, singular_values, Vt_matrix = svd(
        digit_training_matrix.T, 
        full_matrices=False
    )
    
    svd_left_vectors[digit] = U_matrix   # store the basis vectors
    
    print(f"SVD computed for digit {digit}: U shape = {U_matrix.shape}")


print("All SVDs complete.")

def classify_digit(image_vector, num_basis_vectors):
    """Classify an input image using the first r SVD basis vectors."""
    reconstruction_errors = []
    
    for digit in range(10):
        U_basis = svd_left_vectors[digit][:, :num_basis_vectors]
        
        projection = U_basis @ (U_basis.T @ image_vector)
        error = np.linalg.norm(image_vector - projection)
        
        reconstruction_errors.append(error)
    
    predicted_digit = np.argmin(reconstruction_errors)
    return predicted_digit



# Classification usingk = 5, 10, 15, 20 
k_values = [5, 10, 15, 20]
accuracies = {}

for k in k_values:
    print(f"Testing k = {k} singular vectors...")
    preds = []

    for i in range(len(X_test)):
        pred = classify_digit(X_test[i], k)
        preds.append(pred)

    acc = accuracy_score(y_test, preds)
    accuracies[k] = acc
    print(f"Accuracy for k = {k}: {acc:.4f}\n")

results = pd.DataFrame({
    "k (Singular Vectors)": k_values,
    "Accuracy": [accuracies[k] for k in k_values]
})

print("\nFinal Results:")
print(results)

results


Training set: (4000, 400)
Test set: (1000, 400)
Labels loaded: (4000,) (1000,)
Class matrices created.
SVD computed for digit 0: U shape = (400, 400)
SVD computed for digit 1: U shape = (400, 400)
SVD computed for digit 2: U shape = (400, 400)
SVD computed for digit 3: U shape = (400, 400)
SVD computed for digit 4: U shape = (400, 400)
SVD computed for digit 5: U shape = (400, 400)
SVD computed for digit 6: U shape = (400, 400)
SVD computed for digit 7: U shape = (400, 400)
SVD computed for digit 8: U shape = (400, 400)
SVD computed for digit 9: U shape = (400, 400)
All SVDs complete.
Testing k = 5 singular vectors...
Accuracy for k = 5: 0.9180

Testing k = 10 singular vectors...
Accuracy for k = 10: 0.9440

Testing k = 15 singular vectors...
Accuracy for k = 15: 0.9530

Testing k = 20 singular vectors...
Accuracy for k = 20: 0.9570


Final Results:
   k (Singular Vectors)  Accuracy
0                     5     0.918
1                    10     0.944
2                    15     0.953
3 

Unnamed: 0,k (Singular Vectors),Accuracy
0,5,0.918
1,10,0.944
2,15,0.953
3,20,0.957
