In [3]:
import gzip
import numpy as np
from scipy import special
import time

BATCH_SIZE = 60
EPOCHS = 20


def extract_images(file, images_count):
    f = gzip.open(file, 'r')
    f.read(16)
    buff = f.read(28 * 28 * images_count)
    data = np.frombuffer(buff, dtype=np.uint8).astype(np.float32)
    data = data.reshape(images_count, 1, 28, 28)
    return data


def extract_labels(file, images_count):
    f = gzip.open(file, 'r')
    f.read(8)
    buff = f.read(images_count)
    labels = np.frombuffer(buff, dtype=np.uint8).astype(np.int64)
    return labels.reshape([images_count, 1])


train_images = extract_images('train-images-idx3-ubyte.gz', 60000)
test_images = extract_images('t10k-images-idx3-ubyte.gz', 10000)

train_labels = extract_labels('train-labels-idx1-ubyte.gz', 60000)
test_labels = extract_labels('t10k-labels-idx1-ubyte.gz', 10000)


def relu(x):
    x_copy = x.copy()
    for i in range(len(x_copy)):
        for j in range(len(x_copy[i])):
            x_copy[i][j] = max(x_copy[i][j], 0)
    return x_copy


def relu_derivative(x):
    x_copy = x.copy()
    for i in range(len(x_copy)):
        for j in range(len(x_copy[i])):
            if x_copy[i][j] > 0:
                x_copy[i][j] = 1
            else:
                x_copy[i][j] = 0
    return x_copy


class MNIST:

    def __init__(self, inputs_count=28 * 28, hidden_count=300, output_count=10, l_rate=0.1):
        self.input = inputs_count
        self.h = hidden_count
        self.output = output_count
        self.l_rate = l_rate

        self.input_matrix = np.random.normal(0.0, pow(self.h, -0.5), (self.input, self.h))
        self.output_matrix = np.random.normal(0.0, pow(self.output, -0.5), (self.h, self.output))

        self.hidden_activation = lambda x: relu(x)
        self.out_activation = lambda x: special.softmax(x, axis=1)

    def train(self, inputs, targets):
        train_inputs = inputs.reshape(BATCH_SIZE, -1)

        hidden_input = np.dot(train_inputs, self.input_matrix)
        hidden_output = self.hidden_activation(hidden_input)

        out_input = np.dot(hidden_output, self.output_matrix)
        out_output = self.out_activation(out_input)

        out_errors = (out_output - targets) / out_output.shape[0]
        d_out = np.dot(hidden_output.T, out_errors)

        hidden_errors = np.dot(out_errors, self.output_matrix.T) * relu_derivative(hidden_output)
        d_hidden = np.dot(train_inputs.T, hidden_errors)

        self.output_matrix -= self.l_rate * d_out
        self.input_matrix -= self.l_rate * d_hidden

        log_eps = 1e-6  # to avoid np.log(0)
        error = -(targets * np.log(out_output + log_eps))
        return error.sum(axis=1)

    def evaluate(self, inputs):
        evaluate_inputs = inputs.reshape(1, -1)

        hidden_input = np.dot(evaluate_inputs, self.input_matrix)
        hidden_output = self.hidden_activation(hidden_input)

        out_input = np.dot(hidden_output, self.output_matrix)
        out_output = self.out_activation(out_input)

        return out_output


model = MNIST()
normalized_train_images = train_images / 255
start_training_time = time.perf_counter()
print('Train on MNIST dataset (60000 samples)\n')
for e in range(EPOCHS):
    train_errors = []
    for i in range(len(normalized_train_images) // BATCH_SIZE):
        images = normalized_train_images[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
        labels = train_labels[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
        targets = np.zeros([BATCH_SIZE, 10])
        for idx, label in enumerate(labels):
            targets[idx][label] = 1
        err = model.train(images, targets)
        train_errors.append(err)
    train_errors = np.array(train_errors).reshape(-1)
    epoch_error = train_errors.sum() / len(train_errors)
    print("Epoch {}: error = {}".format(e, epoch_error))
end_training_time = time.perf_counter()

print('\nTraining time: {} seconds'.format(end_training_time - start_training_time))

score = []
normalized_test_images = test_images / 255

test_errors = []
for image, label in zip(normalized_test_images, test_labels):
    result = model.evaluate(image).reshape(-1)
    targets = np.zeros(10)
    targets[label] = 1
    log_eps = 1e-6
    test_errors.append(-(targets * np.log(result + log_eps)))
    if label == np.argmax(result):
        score.append(1)
    else:
        score.append(0)
test_errors = np.array(test_errors)
test_error = test_errors.sum() / len(test_errors)
score = np.array(score)
print("\naccuracy on test dataset = {}, test_error = {}".format(score.sum() / len(score), test_error))


Train on MNIST dataset (60000 samples)

Epoch 0: error = 0.24382465961149716
Epoch 1: error = 0.11395140057338417
Epoch 2: error = 0.08329490250691875
Epoch 3: error = 0.06577452438363005
Epoch 4: error = 0.05399733542337181
Epoch 5: error = 0.04530051974388118
Epoch 6: error = 0.03860238116849191
Epoch 7: error = 0.033175711878301664
Epoch 8: error = 0.028852905947698453
Epoch 9: error = 0.02526228630789577
Epoch 10: error = 0.02230642719626968
Epoch 11: error = 0.019783928341650648
Epoch 12: error = 0.0176446562908075
Epoch 13: error = 0.0158499247700459
Epoch 14: error = 0.014294431658880564
Epoch 15: error = 0.01296878461702537
Epoch 16: error = 0.011805503572240623
Epoch 17: error = 0.010802039096835242
Epoch 18: error = 0.009917628121411521
Epoch 19: error = 0.009148557016620255

Training time: 412.4132264 seconds

accuracy on test dataset = 0.9805, test_error = 0.0678247060309584
