In [22]:
import numpy as np
import math

class Sigmoid():
    def __call__(self, x):
        return 1 / (1 + np.exp(-x))

    def gradient(self, x):
        return self.__call__(x) * (1 - self.__call__(x))

class Softmax():
    def __call__(self, x):
        e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
        return e_x / np.sum(e_x, axis=-1, keepdims=True)

    def gradient(self, x):
        p = self.__call__(x)
        return p * (1 - p)
    
class MLP():
    """Multilayer Perceptron classifier. A fully-connected neural network with one hidden layer.
    Unrolled to display the whole forward and backward pass.

    Parameters:
    -----------
    nHidden: int:
        The number of processing nodes (neurons) in the hidden layer. 
    epoch: float
        The number of training iterations the algorithm will tune the weights for.
    eta: float
        The step length that will be used when updating the weights.
    """
    def __init__(self, nHidden, epoch=3000, eta=0.01):
        self.nHidden = nHidden
        self.epoch = epoch
        self.eta = eta
        self.hFunc = Sigmoid()
        self.oFunc = Softmax()

    def _initialize_weights(self, X, y):
        n_features = X.shape[1]
        n_outputs = y.shape[0]
        # Hidden layer
        limit   = 1 / math.sqrt(n_features)
        self.W  = np.random.uniform(-limit, limit, (n_features, self.nHidden))
        self.w0 = np.zeros(self.nHidden)
        # Output layer
        limit   = 1 / math.sqrt(self.nHidden)
        self.V  = np.random.uniform(-limit, limit, (self.nHidden, n_outputs))
        self.v0 = np.zeros(n_outputs)

    def fit(self, X, y):

        self._initialize_weights(X, y)

        for ep in range(self.epoch):
            for t in range(X.shape[0]):
                # ..............
                #  Forward Pass
                # ..............

                # HIDDEN LAYER
                hidden_input = X[t].dot(self.W) - self.w0
                hidden_output = self.hFunc(hidden_input)
                # OUTPUT LAYER
                output_layer_input = hidden_output.dot(self.V) - self.v0
                y_pred = self.oFunc(output_layer_input)

                # ...............
                #  Backward Pass
                # ...............

                # OUTPUT LAYER
                deltaO = (y[t]-y_pred)*(self.oFunc.gradient(output_layer_input))

                # HIDDEN LAYER
                deltaH = np.zeros(len(hidden_output))
                for i in range(len(hidden_input)):
                    deltaH[i] = (output_layer_input[i].dot(deltaO))*(self.hFunc.gradient(hidden_output))

                # Update weights (by gradient descent)
                # Move against the gradient to minimize loss
                for i in range(len(self.V)):
                    for j in range(len(self.V[0])):
                        self.V[i][j] += self.eta*deltaO[j]*output_layer_input[i]
                for i in range(len(self.V0)):
                    self.V0 += self.eta * deltaO[j] * (-1)

                for i in range(len(self.W)):
                    for j in range(len(self.W[0])):
                        self.W[i][j] += self.eta*deltaH[j]*output_layer_input[i]
                for i in range(len(self.W0)):
                    self.W0 += self.eta * deltaH[j] * (-1)

    # Use the trained model to predict labels of X
    def predict(self, X):
        # Forward pass:
        hidden_input = X.dot(self.W) + self.w0
        hidden_output = self.hFunc(hidden_input)
        output_layer_input = hidden_output.dot(self.V) + self.v0
        y_pred = self.oFunc(output_layer_input)
        return y_pred

In [None]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [28]:
# import matplotlib.pyplot as plt

# fig = plt.figure
# for i in range(9):
#     plt.imshow(train_images[i], cmap='gray_r')
#     plt.show()

y = np.zeros((train_labels.shape[0],10))
for idx in range(train_labels.shape[0]):
    y[idx][train_labels[idx]] = 1
    

model = MLP(5)
model.fit(train_images.reshape(train_images.shape[0],784),y)

array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.])