In [1]:
import gzip
import numpy as np
from scipy import special
import time

BATCH_SIZE = 60
EPOCHS = 20


def extract_images(file, images_count):
    f = gzip.open(file, 'r')
    f.read(16)
    buff = f.read(28 * 28 * images_count)
    data = np.frombuffer(buff, dtype=np.uint8).astype(np.float32)
    data = data.reshape(images_count, 1, 28, 28)
    return data


def extract_labels(file, images_count):
    f = gzip.open(file, 'r')
    f.read(8)
    buff = f.read(images_count)
    labels = np.frombuffer(buff, dtype=np.uint8).astype(np.int64)
    return labels.reshape([images_count, 1])


train_images = extract_images('train-images-idx3-ubyte.gz', 60000)
test_images = extract_images('t10k-images-idx3-ubyte.gz', 10000)

train_labels = extract_labels('train-labels-idx1-ubyte.gz', 60000)
test_labels = extract_labels('t10k-labels-idx1-ubyte.gz', 10000)


def relu(x):
    x_copy = x.copy()
    for i in range(len(x_copy)):
        for j in range(len(x_copy[i])):
            x_copy[i][j] = max(x_copy[i][j], 0)
    return x_copy


def relu_derivative(x):
    x_copy = x.copy()
    for i in range(len(x_copy)):
        for j in range(len(x_copy[i])):
            if x_copy[i][j] > 0:
                x_copy[i][j] = 1
            else:
                x_copy[i][j] = 0
    return x_copy


class MNIST:

    def __init__(self, inputs_count=28 * 28, hidden_count=300, output_count=10, l_rate=0.1):
        self.input = inputs_count
        self.h = hidden_count
        self.output = output_count
        self.l_rate = l_rate

        self.input_matrix = np.random.normal(0.0, pow(self.h, -0.5), (self.input, self.h))
        self.output_matrix = np.random.normal(0.0, pow(self.output, -0.5), (self.h, self.output))

        self.hidden_activation = lambda x: relu(x)
        self.out_activation = lambda x: special.softmax(x, axis=1)

    def train(self, inputs, targets):
        train_inputs = inputs.reshape(BATCH_SIZE, -1)

        hidden_input = np.dot(train_inputs, self.input_matrix)
        hidden_output = self.hidden_activation(hidden_input)

        out_input = np.dot(hidden_output, self.output_matrix)
        out_output = self.out_activation(out_input)
        intermediate_result = out_output.argmax(axis=1)

        out_errors = (out_output - targets) / out_output.shape[0]
        d_out = np.dot(hidden_output.T, out_errors)

        hidden_errors = np.dot(out_errors, self.output_matrix.T) * relu_derivative(hidden_output)
        d_hidden = np.dot(train_inputs.T, hidden_errors)

        self.output_matrix -= self.l_rate * d_out
        self.input_matrix -= self.l_rate * d_hidden

        log_eps = 1e-6  # to avoid np.log(0)
        error = -(targets * np.log(out_output + log_eps))
        return error.sum(axis=1), intermediate_result

    def evaluate(self, inputs):
        evaluate_inputs = inputs.reshape(1, -1)

        hidden_input = np.dot(evaluate_inputs, self.input_matrix)
        hidden_output = self.hidden_activation(hidden_input)

        out_input = np.dot(hidden_output, self.output_matrix)
        out_output = self.out_activation(out_input)

        return out_output


model = MNIST()
normalized_train_images = train_images / 255
start_training_time = time.perf_counter()
print('Train on MNIST dataset (60000 samples)\n')
for e in range(EPOCHS):
    train_errors = []
    epoch_score = []
    for i in range(len(normalized_train_images) // BATCH_SIZE):
        images = normalized_train_images[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
        labels = train_labels[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
        targets = np.zeros([BATCH_SIZE, 10])
        for idx, label in enumerate(labels):
            targets[idx][label] = 1
        err, iter_train_result = model.train(images, targets)
        train_errors.append(err)
        for label, epoch_result in zip(labels, iter_train_result):
            if label == epoch_result:
                epoch_score.append(1)
            else: 
                epoch_score.append(0)
    train_errors = np.array(train_errors).reshape(-1)
    epoch_error = train_errors.sum() / len(train_errors)
    epoch_acc = np.array(epoch_score).sum() / len(epoch_score)
    print("Epoch {}: error = {}, accuracy = {}".format(e, epoch_error, epoch_acc))
end_training_time = time.perf_counter()

print('\nTraining time: {} seconds'.format(end_training_time - start_training_time))

score = []
normalized_test_images = test_images / 255

test_errors = []
for image, label in zip(normalized_test_images, test_labels):
    result = model.evaluate(image).reshape(-1)
    targets = np.zeros(10)
    targets[label] = 1
    log_eps = 1e-6
    test_errors.append(-(targets * np.log(result + log_eps)))
    if label == np.argmax(result):
        score.append(1)
    else:
        score.append(0)
test_errors = np.array(test_errors)
test_error = test_errors.sum() / len(test_errors)
score = np.array(score)
print("\naccuracy on test dataset = {}, test_error = {}".format(score.sum() / len(score), test_error))


Train on MNIST dataset (60000 samples)

Epoch 0: error = 0.24366946834243106, accuracy = 0.9294
Epoch 1: error = 0.1111982706371235, accuracy = 0.9683833333333334
Epoch 2: error = 0.08020954088516694, accuracy = 0.97775
Epoch 3: error = 0.06256993306942726, accuracy = 0.9829666666666667
Epoch 4: error = 0.050654026930661546, accuracy = 0.98675
Epoch 5: error = 0.04203583751020802, accuracy = 0.9894833333333334
Epoch 6: error = 0.035477742383309745, accuracy = 0.9918666666666667
Epoch 7: error = 0.03029613955336112, accuracy = 0.9936166666666667
Epoch 8: error = 0.026160701076425284, accuracy = 0.9947666666666667
Epoch 9: error = 0.022802428935359262, accuracy = 0.9960833333333333
Epoch 10: error = 0.020029405234500582, accuracy = 0.9970833333333333
Epoch 11: error = 0.01774060124222088, accuracy = 0.9977
Epoch 12: error = 0.015805409204277924, accuracy = 0.99825
Epoch 13: error = 0.014159846902532587, accuracy = 0.9985333333333334
Epoch 14: error = 0.012763127793099856, accuracy = 0.99