In [1]:
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
import numpy as np
from abc import ABC, abstractmethod
from sklearn.metrics import accuracy_score, f1_score

In [2]:
class Layer(ABC):
    def __init__(self):
        self.input = None
        self.output = None

    @abstractmethod
    def forward(self, input):
        pass

    @abstractmethod
    def backward(self, output_gradient, learning_rate):
        pass


class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size) * np.sqrt(
            2.0 / input_size
        )
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return self.weights @ self.input + self.bias

    def backward(self, output_gradient, learning_rate):
        weights_gradient = output_gradient @ self.input.T
        input_gradient = self.weights.T @ output_gradient
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return input_gradient


class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient, learning_rate):
        return output_gradient * self.activation_prime(self.input)


class ReLU(Activation):
    def __init__(self):
        relu = lambda x: np.maximum(x, 0)
        relu_prime = lambda x: (x > 0).astype(int)
        super().__init__(relu, relu_prime)


class Sigmoid(Activation):
    def __init__(self):
        sigmoid = lambda x: 1 / (1 + np.exp(-x))
        sigmoid_prime = lambda x: sigmoid(x) * (1 - sigmoid(x))
        super().__init__(sigmoid, sigmoid_prime)


def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))


def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)


class Flatten(Layer):
    def forward(self, input):
        self.input_shape = input.shape
        return input.flatten().reshape(-1, 1)

    def backward(self, output_gradient, learning_rate):
        return output_gradient.reshape(self.input_shape)

In [3]:
x, y = load_digits(return_X_y=True)
x_train, y_train = x[:1500], y[:1500]
x_test, y_test = x[1500:], y[1500:]

# normalize - grayscale values for a pixel are between 0 and 16 for this dataset
x_train = x_train / 16
x_test = x_test / 16

# one-hot
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

# network
network = [Flatten(), Dense(64, 128), ReLU(), Dense(128, 10), Sigmoid()]


In [4]:
epochs = 50
learning_rate = 0.1

for epoch in range(epochs):
    error = 0
    for i in range(len(x_train)):
        input = x_train[i].reshape(-1, 1)
        for layer in network:
            input = layer.forward(input)
        error += mse(y_train[i].reshape(-1, 1), input)
        output_gradient = mse_prime(y_train[i].reshape(-1, 1), input)
        for layer in reversed(network):
            output_gradient = layer.backward(output_gradient, learning_rate)
    error /= len(x_train)
    print(f"Epoch {epoch + 1}/{epochs} - Error: {error}")


Epoch 1/50 - Error: 0.06779940675814822
Epoch 2/50 - Error: 0.03266676038573149
Epoch 3/50 - Error: 0.02140012667395678
Epoch 4/50 - Error: 0.01619576374164077
Epoch 5/50 - Error: 0.01340168269711138
Epoch 6/50 - Error: 0.011668946889499269
Epoch 7/50 - Error: 0.010475968410768753
Epoch 8/50 - Error: 0.009590737088822838
Epoch 9/50 - Error: 0.008898237467366968
Epoch 10/50 - Error: 0.008345441544034821
Epoch 11/50 - Error: 0.007883617077136622
Epoch 12/50 - Error: 0.00749149466512825
Epoch 13/50 - Error: 0.007151170007690523
Epoch 14/50 - Error: 0.0068513753058377655
Epoch 15/50 - Error: 0.006579689314487496
Epoch 16/50 - Error: 0.0063330798433987555
Epoch 17/50 - Error: 0.006110914374501388
Epoch 18/50 - Error: 0.005901538202647225
Epoch 19/50 - Error: 0.005710694583677819
Epoch 20/50 - Error: 0.005530456693187848
Epoch 21/50 - Error: 0.005361789838703905
Epoch 22/50 - Error: 0.0052076078155167075
Epoch 23/50 - Error: 0.00506161039391688
Epoch 24/50 - Error: 0.00492332783342815
Epoch 

In [7]:
test_error = 0
correct_pred = 0
dataset_len = 0
for i in range(len(x_test)):
    dataset_len += 1
    input = x_test[i].reshape(-1, 1)
    for layer in network:
        input = layer.forward(input)
    predicted = np.argmax(input)
    target = np.argmax(y_test[i])
    if predicted == target:
        correct_pred += 1
    test_error += mse(y_test[i].reshape(-1, 1), input)
test_error /= len(x_test)
print(f"Test Error after training: {test_error}")
accuracy = correct_pred / dataset_len * 100
print(f"Accuracy: {accuracy}%")

Test Error after training: 0.018601656944059738
Accuracy: 88.88888888888889%
