In [19]:
import numpy as np
from torchvision.datasets import MNIST
from sklearn.preprocessing import OneHotEncoder

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
                    transform=lambda x: np.array(x).flatten() / 255.0,
                    download=True,
                    train=is_train)
    
    mnist_data = []
    mnist_labels = []
    
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    
    return np.array(mnist_data), np.array(mnist_labels)

train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

def one_hot_encode(labels, num_classes=10):
    encoder = OneHotEncoder(sparse_output=False, categories='auto')
    labels = labels.reshape(-1, 1)
    return encoder.fit_transform(labels)

train_Y_oh = one_hot_encode(train_Y)
test_Y_oh = one_hot_encode(test_Y)

print("Training Data Shape:", train_X.shape)
print("One-hot Encoded Labels Shape:", train_Y_oh.shape)


Training Data Shape: (60000, 784)
One-hot Encoded Labels Shape: (60000, 10)


In [27]:
def softmax(z):
    exp_z = np.exp(z - np.max(z))
    return exp_z / np.sum(exp_z, axis=-1, keepdims=True)

def cross_entropy_loss(y_true, y_pred):
    return -np.sum(y_true * np.log(y_pred + 1e-9))

def perceptron(train_X, train_Y, learning_rate=0.01, epochs=50, batch_size=100):
    n_features = train_X.shape[1] #number of columns, shape[0] would be number of rows
    n_classes = train_Y.shape[1]

    weight = np.zeros((n_features, n_classes))
    bias = np.zeros(n_classes)

    num_samples = train_X.shape[0]

    for epoch in range(epochs):
        for i in range(0, num_samples, batch_size):
            X_batch = train_X[i:i+batch_size]
            y_batch = train_Y[i:i+batch_size]

            delta_weight = np.zeros((n_features, n_classes))
            delta_bias = np.zeros(n_classes)

            #for each sample in the batch:
            for j in range(len(X_batch)):
                #we take the input and label:
                x = X_batch[j]
                y_true = y_batch[j]

                z = np.dot(x, weight) + bias

                #apply softmax to get predicted probabilities
                y_pred = softmax(z)

                #compute gradients
                delta_weight += np.outer(x, y_pred - y_true) #using outer product to apply the error to all values of x
                delta_bias += y_pred - y_true

            #updating weight and bias to minimize error:
            weight -= learning_rate * delta_weight / batch_size
            bias -= learning_rate * delta_bias / batch_size

        if epoch % 5 == 0:
            z = np.dot(train_X, weight) + bias
            y_pred = softmax(z)
            loss = np.mean([cross_entropy_loss(y_true, y_pred) for y_true, y_pred in zip(train_Y, y_pred)])
            print(f'Epoch {epoch + 1}/{epochs} - Loss: {loss:.4f}')

    return weight, bias

In [28]:
def compute_accuracy(X, Y_true, weight, bias):
    z = np.dot(X, weight) + bias
    
    Y_pred_probs = softmax(z)
    Y_pred = np.argmax(Y_pred_probs, axis=1) #predicted class corresponds to the index of the highest probability
    Y_true_labels = np.argmax(Y_true, axis=1)
    
    correct_predictions = np.sum(Y_pred == Y_true_labels)
    accuracy = correct_predictions / len(Y_true_labels)
    
    return accuracy

In [29]:
weight, bias = perceptron(train_X, train_Y_oh, learning_rate=0.01, epochs=50, batch_size=100)

print("Weight matrix: ", weight)
print("Bias vector: ", bias)
accuracy = compute_accuracy(test_X, test_Y_oh, weight, bias)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Epoch 1/50 - Loss: 0.7401
Epoch 6/50 - Loss: 0.4221
Epoch 11/50 - Loss: 0.3728
Epoch 16/50 - Loss: 0.3494
Epoch 21/50 - Loss: 0.3350
Epoch 26/50 - Loss: 0.3248
Epoch 31/50 - Loss: 0.3173
Epoch 36/50 - Loss: 0.3113
Epoch 41/50 - Loss: 0.3064
Epoch 46/50 - Loss: 0.3023
Weight matrix:  [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Bias vector:  [-0.27007485  0.31157985  0.04396047 -0.20441136  0.05444098  0.91977435
 -0.05792437  0.46330906 -1.08525695 -0.17539717]
Test Accuracy: 91.95%
