In [1]:
import numpy as np
from numpy.linalg import svd
import os
from PIL import Image
import matplotlib.pyplot as plt




In [2]:
len(os.listdir('CP05/DigitFiles/Digit0'))
print(f'1:04.png')
digit = 0
image_file = f'{3:04}.png'
img = Image.open(f'CP05/DigitFiles/Digit{digit}/{image_file}')
img = np.array(img.convert('L')).flatten()
img.size


1:04.png


784

In [3]:
svTol = 0.10
svd_basis = {}  # dictionary to store the SVD basis for each digit
for digit in range(10):
    image_files = os.listdir(f'CP05/DigitFiles/Digit{digit}')
    images = []
    # num_image_files = len(os.listdir(f'CP05/DigitFiles/Digit{digit}'))
    num_image_files = 200
    for i in range(1, num_image_files):
        image_file = f'{i:04}.png'
        img = Image.open(f'CP05/DigitFiles/Digit{digit}/{image_file}')
        img = np.array(img.convert('L')).flatten()
        images.append(img)  
        A = np.array(images)
        U, S, V = svd(A, full_matrices=False)
        cumsum = np.cumsum(S)
        k = np.argmax(cumsum / cumsum[-1] > svTol)
        svd_basis[digit] = U[:, :k]


In [4]:

# Get the list of directories in the DigitFiles folder
digit_dirs = os.listdir('CP05/DigitFiles')

# Initialize variables for digits 0 to 9
A = [None]*10
U = [None]*10
S = [None]*10
V = [None]*10  # changed from B to V
k_values = [0]*10

# Loop over each digit directory
for digit_dir in digit_dirs:
    digit = int(digit_dir[-1])  # get the digit from the directory name
    
    # Get the list of image file names in the digit's directory
    image_files = os.listdir(f'CP05/DigitFiles/{digit_dir}')
    
    for i, image_file in enumerate(image_files):
        img = Image.open(f'CP05/DigitFiles/{digit_dir}/{image_file}')
        img_array = np.array(img.convert('L')).flatten()
        
        # Initialize A[digit] with the size of the first image
        if i == 0:
            A[digit] = np.zeros((img_array.size, len(image_files)))
        
        A[digit][:, i] = img_array

    U[digit], S[digit], V[digit] = np.linalg.svd(A[digit])  # changed from U, S, _ to U, S, V
    
    # Determine the value of k for the current digit
    svdTol = 0.9
    sigmaAll = np.sum(S[digit][:min(img_array.size, len(image_files))])  # corrected line
    sigmaK = 0
    for i in range(np.shape(S[digit])[0]):
        sigmaK = sigmaK + S[digit][i]  # corrected line
        if sigmaK / sigmaAll > svdTol:
            k_values[digit] = i
            break

print(k_values)  # Display the values of k for digits 0 to 9


[286, 185, 305, 292, 286, 290, 263, 272, 274, 256]


In [5]:


def myClassifyDigits(svTol, nImgs):
    # Get the list of directories in the DigitFiles folder
    digit_dirs = os.listdir('CP05/DigitFiles')

    # Initialize variables for digits 0 to 9
    A = [None]*10
    U = [None]*10
    S = [None]*10
    V = [None]*10
    k_values = [0]*10
    w = list(range(10))  # w is a list of digits from 0 to 9
    nDigits = np.zeros(10)

    # Loop over each digit directory
    for digit_dir in digit_dirs:
        digit = int(digit_dir[-1])  # get the digit from the directory name

        # Get the list of image file names in the digit's directory
        image_files = os.listdir(f'CP05/DigitFiles/{digit_dir}')[:nImgs]

        for i, image_file in enumerate(image_files):
            img = Image.open(f'CP05/DigitFiles/{digit_dir}/{image_file}')
            img_array = np.array(img.convert('L')).flatten()

            # Initialize A[digit] with the size of the first image
            if i == 0:
                A[digit] = np.zeros((img_array.size, len(image_files)))

            A[digit][:, i] = img_array

        U[digit], S[digit], V[digit] = np.linalg.svd(A[digit])

        # Determine the value of k for the current digit
        sigmaAll = np.sum(S[digit][:min(img_array.size, len(image_files))])
        sigmaK = 0
        for i in range(np.shape(S[digit])[0]):
            sigmaK = sigmaK + S[digit][i]
            if sigmaK / sigmaAll > svTol:
                k_values[digit] = i
                break

    # Function to classify a test image
    def classify(test_image, U, S, V, k_values):
        # Initialize variables
        min_residual = np.inf
        digit_pred = None

        # Loop over each digit
        for digit in range(10):
            # Project the test image onto the space spanned by the first k singular vectors of the digit
            projection = np.dot(np.transpose(U[digit][:, :k_values[digit]]), test_image)

            # Reconstruct the image
            reconstruction = np.dot(U[digit][:, :k_values[digit]], projection)

            # Calculate the relative residual of the difference between the test image and the reconstruction
            residual = np.linalg.norm(test_image - reconstruction) / np.linalg.norm(test_image)

            # If the residual is smaller than the current minimum, update the minimum and the predicted digit
            if residual < min_residual:
                min_residual = residual
                digit_pred = digit

        return digit_pred

    # Loop over each digit directory in the TestDigits directory
    test_digit_dirs = os.listdir('CP05/TestDigits')
    for digit_dir in test_digit_dirs:
        digit = int(digit_dir[-1])  # get the digit from the directory name

        # Loop over each image in the digit directory
        test_files = os.listdir(f'CP05/TestDigits/{digit_dir}')[:nImgs]
        for i, image_file in enumerate(test_files):
            img = Image.open(f'CP05/TestDigits/{digit_dir}/{image_file}')
            img_array = np.array(img.convert('L')).flatten()

            # Classify the test image
            digit_pred = classify(img_array, U, S, V, k_values)

            # Update nDigits
            nDigits[digit_pred] += 1

    return w, nDigits


In [6]:
myClassifyDigits(0.2,100)

([0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 array([ 86.,  99., 101.,  87., 109., 124., 101.,  92., 107.,  94.]))

In [7]:
myClassifyDigits(0.6,100)

([0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 array([100., 100., 100., 100., 100., 100., 100., 100., 100., 100.]))