In [6]:
import numpy as np
import idx2numpy
from datetime import datetime

In [7]:
# prepare data
x_train = idx2numpy.convert_from_file('train-images.idx3-ubyte') / 255
y_train = idx2numpy.convert_from_file('train-labels.idx1-ubyte')
x_test = idx2numpy.convert_from_file('t10k-images.idx3-ubyte') / 255
y_test = idx2numpy.convert_from_file('t10k-labels.idx1-ubyte')

y_train = np.eye(10)[y_train] # convert to one-hot
y_test = np.eye(10)[y_test] # convert to one-hot

x_train = x_train.reshape((x_train.shape[0], 28 * 28))
x_test = x_test.reshape((x_test.shape[0], 28 * 28))

In [8]:
def cross_entropy_loss(x, y):
    return np.mean(-np.sum(x * np.log(y), axis=1))

def relu(x):
    return np.maximum(x, 0)

def softmax(x):
    exp = np.exp(x)
    return exp/np.sum(exp, axis = 1, keepdims = True)

In [9]:
class NeuralNetwork(object):
    def __init__(self, num_nodes_in_layers):
        # weights
        self.w1 = np.random.normal(0, np.sqrt(2 / num_nodes_in_layers[0]), [num_nodes_in_layers[0], num_nodes_in_layers[1]])
        self.w2 = np.random.normal(0, np.sqrt(2 / num_nodes_in_layers[1]), [num_nodes_in_layers[1], num_nodes_in_layers[2]])

        # biases
        self.b1 = np.zeros((1, num_nodes_in_layers[1]))
        self.b2 = np.zeros((1, num_nodes_in_layers[2]))

    def forward(self, x):
        self.z1 = np.dot(x, self.w1) + self.b1
        self.a1 = relu(self.z1)
        self.z2 = np.dot(self.a1, self.w2) + self.b2
        self.a2 = softmax(self.z2)

    def backpropagation(self, xTrain, yTrain, learningRate):
        dz2 = (self.a2 - yTrain) / self.a2.shape[0]
        dw1 = (self.a1.T).dot(dz2)
        db1 = np.sum(dz2, axis=0, keepdims=True)
        dz1 = dz2.dot(self.w2.T) * np.where(self.z1 > 0.0, 1, 0)
        dw0 = np.dot(xTrain.T, dz1)
        db0 = np.sum(dz1, axis=0, keepdims=True)
        
        # ipdate weights
        self.w2 -= learningRate * dw1
        self.w1 -= learningRate * dw0
        
        # update biases
        self.b2 -= learningRate * db1
        self.b1 -= learningRate * db0


    def train(self, x_train, y_train, epochs=20, learning_rate=0.1, batch_size=64):
        start_train = datetime.now()
        for epoch in range(epochs):
            start_time = datetime.now()
            iteration = 0
            while iteration < len(x_train):
                x_batch = x_train[iteration:iteration + batch_size]
                y_batch = y_train[iteration:iteration + batch_size]

                y = self.forward(x_batch)
                self.backpropagation(x_batch, y_batch, learning_rate)
                iteration += batch_size

            time = (datetime.now() - start_time).total_seconds()
            out = self.forward(x_train)
            cross_entropy = cross_entropy_loss(y_train, self.a2)
            accuracy = np.mean(np.argmax(y_train, axis=1) == np.argmax(self.a2, axis=1))

            print(
                f'Epoch[{epoch + 1}]    Time: {time:.2f} s\t\tcross-entropy-error: {cross_entropy:.3f}\t\taccuracy: {accuracy:.3f}')

        finish_train = datetime.now()
        print(f"Train time: {(finish_train - start_train).total_seconds():.2f} s\n"
              f"Train error: {cross_entropy:.3f}\n"
              f"Train accuracy: {accuracy:.3f}")


    def test(self, x_test, y_test):
        out = self.forward(x_test)
        cross_entropy = cross_entropy_loss(y_test, self.a2)
        accuracy = np.mean(np.argmax(y_test, axis=1) == np.argmax(self.a2, axis=1))

        print()
        print(f"Test error: {cross_entropy:.3f}\nTest accuracy: {accuracy:.3f} ")

In [10]:
nn = NeuralNetwork([784, 300, 10])
nn.train(x_train, y_train)
nn.test(x_test, y_test)

Epoch[1]    Time: 18.77 s		cross-entropy-error: 0.224		accuracy: 0.934
Epoch[2]    Time: 2.82 s		cross-entropy-error: 0.157		accuracy: 0.954
Epoch[3]    Time: 2.89 s		cross-entropy-error: 0.122		accuracy: 0.965
Epoch[4]    Time: 2.88 s		cross-entropy-error: 0.100		accuracy: 0.971
Epoch[5]    Time: 2.63 s		cross-entropy-error: 0.084		accuracy: 0.977
Epoch[6]    Time: 2.89 s		cross-entropy-error: 0.072		accuracy: 0.980
Epoch[7]    Time: 2.61 s		cross-entropy-error: 0.062		accuracy: 0.983
Epoch[8]    Time: 2.60 s		cross-entropy-error: 0.055		accuracy: 0.985
Epoch[9]    Time: 2.61 s		cross-entropy-error: 0.049		accuracy: 0.987
Epoch[10]    Time: 2.71 s		cross-entropy-error: 0.044		accuracy: 0.988
Epoch[11]    Time: 2.64 s		cross-entropy-error: 0.040		accuracy: 0.989
Epoch[12]    Time: 2.60 s		cross-entropy-error: 0.037		accuracy: 0.991
Epoch[13]    Time: 2.62 s		cross-entropy-error: 0.034		accuracy: 0.991
Epoch[14]    Time: 2.61 s		cross-entropy-error: 0.031		accuracy: 0.992
Epoch[15]    T