In [34]:
import gzip
import numpy as np
from scipy import special
import time

EPOCHS = 20


def extract_images(file, images_count):
    f = gzip.open(file, 'r')
    f.read(16)
    buff = f.read(28 * 28 * images_count)
    data = np.frombuffer(buff, dtype=np.uint8).astype(np.float32)
    data = data.reshape(images_count, 1, 28, 28)
    return data


def extract_labels(file, images_count):
    f = gzip.open(file, 'r')
    f.read(8)
    buff = f.read(images_count)
    labels = np.frombuffer(buff, dtype=np.uint8).astype(np.int64)
    return labels


train_images = extract_images('train-images-idx3-ubyte.gz', 60000)
test_images = extract_images('t10k-images-idx3-ubyte.gz', 10000)

train_labels = extract_labels('train-labels-idx1-ubyte.gz', 60000)
test_labels = extract_labels('t10k-labels-idx1-ubyte.gz', 10000)


def relu(x):
    x_copy = x.copy()
    for i in range(len(x_copy)):
        for j in range(len(x_copy[i])):
            x_copy[i][j] = max(x_copy[i][j], 0)
    return x_copy


def relu_derivative(x):
    x_copy = x.copy()
    for i in range(len(x_copy)):
        for j in range(len(x_copy[i])):
            if x_copy[i][j] > 0:
                x_copy[i][j] = 1
            else:
                x_copy[i][j] = 0
    return x_copy


class MNIST:

    def __init__(self, inputs_count=28 * 28, hidden_count=300, output_count=10, l_rate=0.01):
        self.input = inputs_count
        self.h = hidden_count
        self.output = output_count
        self.l_rate = l_rate

        self.input_matrix = np.random.normal(0.0, pow(self.h, -0.5), (self.input, self.h))
        self.output_matrix = np.random.normal(0.0, pow(self.output, -0.5), (self.h, self.output))

        self.hidden_activation = lambda x: relu(x)
        self.out_activation = lambda x: special.softmax(x)

    def train(self, inputs, targets):
        train_inputs = inputs.reshape(1, -1)
        train_targets = targets.reshape(1, len(targets))

        hidden_input = np.dot(train_inputs, self.input_matrix)
        hidden_output = self.hidden_activation(hidden_input)

        out_input = np.dot(hidden_output, self.output_matrix)
        out_output = self.out_activation(out_input)

        out_errors = (train_targets - out_output) * out_output * (1.0 - out_output)
        d_out = np.dot(hidden_output.T, out_errors)

        hidden_errors = np.dot(out_errors, self.output_matrix.T) * relu_derivative(hidden_output)
        d_hidden = np.dot(train_inputs.T, hidden_errors)

        self.output_matrix += self.l_rate * d_out
        self.input_matrix += self.l_rate * d_hidden

        error = -(train_targets * np.log(out_output))
        return error.reshape(-1).sum()

    def evaluate(self, inputs):
        evaluate_inputs = inputs.reshape(1, -1)

        hidden_input = np.dot(evaluate_inputs, self.input_matrix)
        hidden_output = self.hidden_activation(hidden_input)

        out_input = np.dot(hidden_output, self.output_matrix)
        out_output = self.out_activation(out_input)

        return out_output


model = MNIST()
normalized_train_images = train_images / 255
start_training_time = time.perf_counter()
for e in range(EPOCHS):
    train_errors = []
    for image, label in zip(normalized_train_images, train_labels):
        targets = np.zeros(10)
        targets[label] = 1
        err = model.train(image, targets)
        train_errors.append(err)
    train_errors = np.array(train_errors)
    epoch_error = train_errors.sum() / len(train_errors)
    print("Epoch {}: error = {}".format(e, epoch_error))
end_training_time = time.perf_counter()

print('Training time: {}'.format(end_training_time - start_training_time))

score = []
normalized_test_images = test_images / 255

test_errors = []
for image, label in zip(normalized_test_images, test_labels):
    result = model.evaluate(image).reshape(-1)
    targets = np.zeros(10)
    targets[label] = 1
    test_errors.append(-(targets * np.log(result)))
    if label == np.argmax(result):
        score.append(1)
    else:
        score.append(0)
test_errors = np.array(test_errors)
test_error = test_errors.sum() / len(test_errors)
score = np.array(score)
print("accuracy = {}, test_error = {}".format(score.sum() / len(score), test_error))


EPOCH 0




EPOCH 1
EPOCH 2
EPOCH 3
EPOCH 4
EPOCH 5
EPOCH 6
EPOCH 7
EPOCH 8
EPOCH 9
EPOCH 10
EPOCH 11


KeyboardInterrupt: 