In [1]:
import numpy as np
import plotly.express as px
from sklearn.metrics import accuracy_score, confusion_matrix

### 1. Form a matrix A for each digit, such that each row in A represents an image of that digit. (You will have 10 A’s).

In [2]:
# input data
training_input = np.loadtxt('trainInput.csv', delimiter=',').T
training_output = np.loadtxt('trainOutput.csv', delimiter=',').T
test_input = np.loadtxt('testInput.csv', delimiter=',').T
test_output = np.loadtxt('testOutput.csv', delimiter=',').T


In [3]:
# Create grop according to training_output lable
training_matrix = {}
for i in range(0, 10):
    training_matrix[i] = []

# divide training_input to corresponding group
for i, data in enumerate(training_output):
    training_matrix[data].append(training_input[i])

# turn value to array
for i , data in enumerate(training_matrix):
    training_matrix[data] = np.array(training_matrix[data])
    print(f'{data}: {training_matrix[data].shape}')


0: (319, 256)
1: (252, 256)
2: (202, 256)
3: (131, 256)
4: (122, 256)
5: (88, 256)
6: (151, 256)
7: (166, 256)
8: (144, 256)
9: (132, 256)


### 2. Determine the singular value decomposition for each A. (Right singular vectors Vi are anorthogonal basis in the image space of that digit. We will refer to the right singular vectors as “singular images.”) You should get 10 sets of singular images, one for each digit.

In [4]:
singular_image = {}
for key, value in training_matrix.items():
    u, s, vt = np.linalg.svd(value, full_matrices= False)
    singular_image[key] = vt[:20, :]

for key, value in singular_image.items():
    print(f"Matrix Vt{key}: {value.shape}")

# test Vt matrix
for i in range(0, 10):
    test1 = px.imshow(singular_image[i][0].reshape(16, 16), binary_string='True')
    test1.show()

Matrix Vt0: (20, 256)
Matrix Vt1: (20, 256)
Matrix Vt2: (20, 256)
Matrix Vt3: (20, 256)
Matrix Vt4: (20, 256)
Matrix Vt5: (20, 256)
Matrix Vt6: (20, 256)
Matrix Vt7: (20, 256)
Matrix Vt8: (20, 256)
Matrix Vt9: (20, 256)


### 3. Express test images as a linear combination of the first k=20 singular images of each digit. (This is a least square problem of the form Ax=b).
### 4. Compute the distance between test images and their least square approximations.

In [5]:
# lease equation:
# b = Ax

predictions = []
for data in test_input:
  min_residual = float('inf')
  predicted_digit = -1

  for i in range(0, 10):
    A = singular_image[i].T
    b = data
    # Lease eqauion funcion x, _, _, _ = np.linalg.lstsq(A, b, rcond=None)
    x = np.linalg.inv(A.T @ A) @ A.T @ b
    print('Each least square approximations', x)
    reconstruct = A @ x
    residual = np.linalg.norm(b - reconstruct)
    print('Distance', residual)
    if residual < min_residual:
      min_residual = residual
      predicted_digit = i
  # get each prediciton
  predictions.append(predicted_digit)

print("Each test image's smallest residual:",predictions)




Each least square approximations [-5.41666556  2.41227133 -4.39537401  0.75664743 -3.70285325 -0.90125619
  1.57306974 -1.30962446 -5.51389539  2.58637327  0.31978069  0.09088643
 -0.96662854  0.28462644 -0.82679362  2.2250073  -1.62089348  1.40388135
 -0.54332827 -0.58966673]
Distance 9.450711113293487
Each least square approximations [ 7.18350384  0.05136589 -1.49083109  0.98947995 -0.1653551   4.13234743
 -0.95732688 -0.7623245   1.55905772 -3.34351012  0.45985687 -1.04857239
  0.5388918   0.7610206   0.44893412  0.64506411  0.4519482   1.40257684
 -1.15601582  0.37393031]
Distance 10.895407527825517
Each least square approximations [ 7.19285783  0.58763115 -0.48685216 -2.07888026  3.72047026  3.84501636
 -0.89360075 -1.82388366  1.9582215   0.63464952  2.42213996 -1.16167719
  1.04270674 -0.36627084  1.17241428  0.34896153 -3.76007856 -0.60774207
 -0.86264051  1.33529276]
Distance 9.569496016713453
Each least square approximations [-7.808613   -0.41321026 -2.88700221 -5.42480417  3

### 5. Calculate the overall correct classification rate, as well as correct classification rate for each digit in a confusion matrix.

In [6]:
# Get overall accuracy:
# test_output / predictions
accuracy = accuracy_score(test_output, predictions)
print(f"Overall accuracy: {accuracy * 100:.2f}%")

# Get confusion matrix
conf_matrix= confusion_matrix(test_output, predictions)
print(f"Confusion_matrix:\n {conf_matrix}")

# Get correct classification rate for each digit in a confusion matrix
digit_accuracy = {}
for digit in range(0,10):
    correct = conf_matrix[digit, digit]
    total = np.sum(conf_matrix[digit, :])
    if total > 0:
        accuracy = correct / total
        digit_accuracy[digit] = accuracy

for key, value in digit_accuracy.items():
    print(f"Digit {key} has {value:.2f} accuracy")



Overall accuracy: 93.97%
Confusion_matrix:
 [[355   0   2   0   1   0   0   0   0   1]
 [  0 259   0   0   3   0   2   0   0   0]
 [  8   1 179   2   3   0   0   1   4   0]
 [  1   0   4 148   1   8   0   1   2   1]
 [  1   1   0   0 187   1   1   3   0   6]
 [  8   1   2   4   0 140   0   0   2   3]
 [  2   0   0   0   2   2 164   0   0   0]
 [  0   1   1   0   3   0   0 139   0   3]
 [  4   0   2   6   0   1   0   0 149   4]
 [  0   1   0   1   4   0   0   4   1 166]]
Digit 0 has 0.99 accuracy
Digit 1 has 0.98 accuracy
Digit 2 has 0.90 accuracy
Digit 3 has 0.89 accuracy
Digit 4 has 0.94 accuracy
Digit 5 has 0.88 accuracy
Digit 6 has 0.96 accuracy
Digit 7 has 0.95 accuracy
Digit 8 has 0.90 accuracy
Digit 9 has 0.94 accuracy
