In [8]:
import numpy as np
import idx2numpy
from datetime import datetime

In [9]:
# prepare data
x_train = idx2numpy.convert_from_file('train-images.idx3-ubyte') / 255
y_train = idx2numpy.convert_from_file('train-labels.idx1-ubyte')
x_test = idx2numpy.convert_from_file('t10k-images.idx3-ubyte') / 255
y_test = idx2numpy.convert_from_file('t10k-labels.idx1-ubyte')

y_train = np.eye(10)[y_train] # convert to one-hot
y_test = np.eye(10)[y_test] # convert to one-hot

x_train = x_train.reshape((x_train.shape[0], 28 * 28))
x_test = x_test.reshape((x_test.shape[0], 28 * 28))

In [10]:
def cross_entropy_loss(x, y):
    return np.mean(-np.sum(x * np.log(y), axis=1))

def relu(x):
    return np.maximum(x, 0)

def softmax(x):
    exp = np.exp(x)
    return exp/np.sum(exp, axis = 1, keepdims = True)

In [21]:
class NeuralNetwork(object):
    def __init__(self, num_nodes_in_layers):
        # weights
        self.w1 = np.random.normal(0, 1, [num_nodes_in_layers[0], num_nodes_in_layers[1]])
        self.w2 = np.random.normal(0, 1, [num_nodes_in_layers[1], num_nodes_in_layers[2]])

        # biases
        self.b1 = np.zeros((1, num_nodes_in_layers[1]))
        self.b2 = np.zeros((1, num_nodes_in_layers[2]))


    def forward(self, x):
        self.z1 = np.dot(x, self.w1) + self.b1
        self.a1 = relu(self.z1)
        self.z2 = np.dot(self.a1, self.w2) + self.b2

        return softmax(self.z2)


    def backward_pass(self, out, y_train):
        self.delta_y = (out - y_train) / y_train.shape[0]
        self.dz0 = np.dot(self.delta_y, self.w2.T)
        self.dz0 * np.where(self.z1 > 0.0, 1, 0) # derivative of relu

    def backpropagation(self, x_train, learning_rate):
        dw2 = np.dot(self.a1.T, self.delta_y)
        db2 = np.sum(self.delta_y, axis=0)

        dw1 = np.dot(x_train.T, self.dz0)
        db1 = np.sum(self.dz0, axis=0)

        # ipdate weights
        self.w1 = self.w1 - learning_rate * dw1
        self.w2 = self.w2 - learning_rate * dw2

        # update biases
        self.b1 = self.b1 - learning_rate * db1
        self.b2 = self.b2 - learning_rate * db2


    def train(self, x_train, y_train, epochs=20, learning_rate=0.1, batch_size=64):
        start_train = datetime.now()
        for epoch in range(epochs):
            start_time = datetime.now()
            iteration = 0
            while iteration < len(x_train):
                x_batch = x_train[iteration:iteration + batch_size]
                y_batch = y_train[iteration:iteration + batch_size]

                y = self.forward(x_batch)

                self.backward_pass(y, y_batch)
                self.backpropagation(x_batch, learning_rate)
                iteration += batch_size

            time = (datetime.now() - start_time).total_seconds()
            out = self.forward(x_train)
            cross_entropy = cross_entropy_loss(y_train, out)
            accuracy = np.mean(np.argmax(y_train, axis=1) == np.argmax(out, axis=1))

            print(
                f'Epoch[{epoch + 1}]    Time: {time:.2f} s\t\tcross-entropy-error: {cross_entropy:.3f}\t\taccuracy: {accuracy:.3f}')

        finish_train = datetime.now()
        print(f"Train time: {(finish_train - start_train).total_seconds():.2f} s\n"
              f"Train error: {cross_entropy:.3f}\n"
              f"Train accuracy: {accuracy:.2f}")


    def test(self, x_test, y_test):
        out = self.forward(x_test)
        cross_entropy = cross_entropy_loss(y_test, out)
        accuracy = np.mean(np.argmax(y_test, axis=1) == np.argmax(out, axis=1))

        print()
        print(f"Test error: {cross_entropy:.3f}\nTest accuracy: {accuracy:.2f} ")

In [22]:
nn = NeuralNetwork([784, 300, 10])
nn.train(x_train, y_train)
nn.test(x_test, y_test)

Epoch[1]    Time: 4.47 s		cross-entropy-error: 2.011		accuracy: 0.916
Epoch[2]    Time: 4.55 s		cross-entropy-error: 1.006		accuracy: 0.939
Epoch[3]    Time: 4.21 s		cross-entropy-error: 0.716		accuracy: 0.947
Epoch[4]    Time: 4.10 s		cross-entropy-error: 0.514		accuracy: 0.955
Epoch[5]    Time: 4.15 s		cross-entropy-error: 0.417		accuracy: 0.958
Epoch[6]    Time: 4.15 s		cross-entropy-error: 0.347		accuracy: 0.962
Epoch[7]    Time: 4.18 s		cross-entropy-error: 0.304		accuracy: 0.963
Epoch[8]    Time: 4.40 s		cross-entropy-error: 0.244		accuracy: 0.968
Epoch[9]    Time: 4.86 s		cross-entropy-error: 0.211		accuracy: 0.970
Epoch[10]    Time: 4.76 s		cross-entropy-error: 0.183		accuracy: 0.972
Epoch[11]    Time: 4.58 s		cross-entropy-error: 0.170		accuracy: 0.973
Epoch[12]    Time: 4.36 s		cross-entropy-error: 0.139		accuracy: 0.976
Epoch[13]    Time: 4.18 s		cross-entropy-error: 0.134		accuracy: 0.976
Epoch[14]    Time: 4.27 s		cross-entropy-error: 0.130		accuracy: 0.976
Epoch[15]    Ti