In [1]:
import gzip
import numpy as np

from datetime import datetime
from scipy.special import softmax as sf

In [2]:
def load_images(file):
    with gzip.open(file, 'r') as f:
        _ = int.from_bytes(f.read(4), 'big')
        image_count = int.from_bytes(f.read(4), 'big')
        row_count = int.from_bytes(f.read(4), 'big')
        column_count = int.from_bytes(f.read(4), 'big')
        image_data = f.read()

        return np.frombuffer(image_data, dtype=np.uint8).reshape((image_count, row_count, column_count))

def load_labels(file):
    with gzip.open(file, 'r') as f:
        _ = int.from_bytes(f.read(4), 'big')
        _ = int.from_bytes(f.read(4), 'big')
        label_data = f.read()
        return np.frombuffer(label_data, dtype=np.uint8)

def one_hot(a, num_classes):
    return np.squeeze(np.eye(num_classes)[a.reshape(-1)])

In [3]:
OUTPUT_LAYER = 10
NUM_CLASSES = 10

In [4]:
x_train = load_images('train-images-idx3-ubyte.gz')
y_train = load_labels('train-labels-idx1-ubyte.gz')

x_test = load_images('t10k-images-idx3-ubyte.gz')
y_test = load_labels('t10k-labels-idx1-ubyte.gz')

print(f"shape train: x - {x_train.shape}, y - {y_train.shape}")
print(f"shape train: x - {x_test.shape}, y - {y_test.shape}")

x_train, x_test = x_train / 255.0, x_test / 255.0

print(f"train: min {x_train.min()}, max {x_train.max()}")
print(f"test:  min {x_test.min()}, max {x_test.max()}")

y_train = one_hot(y_train, NUM_CLASSES)
y_test = one_hot(y_test, NUM_CLASSES)

x_train = x_train.reshape((x_train.shape[0], 28 * 28))
x_test = x_test.reshape((x_test.shape[0], 28 * 28))

print(f"shape train: x - {x_train.shape}, y - {y_train.shape}")
print(f"shape train: x - {x_test.shape}, y - {y_test.shape}")

shape train: x - (60000, 28, 28), y - (60000,)
shape train: x - (10000, 28, 28), y - (10000,)
train: min 0.0, max 1.0
test:  min 0.0, max 1.0
shape train: x - (60000, 784), y - (60000, 10)
shape train: x - (10000, 784), y - (10000, 10)


In [5]:
class FCNN(object):
    def __init__(self, input_layer, hidden_layer, output_layer):
        super(FCNN, self).__init__()
        self.input_layer = input_layer
        self.hidden_layer = hidden_layer
        self.output_layer = output_layer

        self.w = [np.random.normal(0, np.sqrt(2 / input_layer), (input_layer, hidden_layer)),
                  np.random.normal(0, np.sqrt(2 / (input_layer + output_layer)), (hidden_layer, output_layer))]

        self.b = [np.full(hidden_layer, 0.05),
                  np.full(output_layer, 0.05)]

    @staticmethod
    def relu(x):
        return np.maximum(x, 0)

    @staticmethod
    def softmax(x):
        return sf(x, axis=1)

    @staticmethod
    def derivative_relu(x):
        return np.where(x > 0.0, 1, 0)

    @staticmethod
    def accuracy(y_true, y_pred):
        return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

    @staticmethod
    def cross_entropy_loss(y_true, y_pred):
        return np.mean(-np.sum(y_true * np.log(y_pred), axis=1))

    def __forward(self, x):
        x = np.matmul(x, self.w[0]) + self.b[0]
        self.t = x.copy()
        x = self.relu(x)
        self.h = x.copy()

        x = np.matmul(x, self.w[1]) + self.b[1]
        x = self.softmax(x)
        return x

    def __backward(self, x_train, y_pred, y_true):
        mc = 1 / x_train.shape[0]
        dz1 = y_pred - y_true

        dw1 = mc * np.matmul(self.h.T, dz1)
        db1 = mc * np.sum(dz1, axis=0)

        dz0 = np.matmul(dz1, self.w[1].T) * self.derivative_relu(self.t)

        dw0 = mc * np.matmul(x_train.T, dz0)
        db0 = mc * np.sum(dz0, axis=0)

        self.w[1] = self.w[1] - self.lr * dw1
        self.b[1] = self.b[1] - self.lr * db1

        self.w[0] = self.w[0] - self.lr * dw0
        self.b[0] = self.b[0] - self.lr * db0

    def train(self, x_train, y_train, validation_data=None, epochs=10, learning_rate=0.1, batch_size=32):
        self.lr = learning_rate
        all_time = datetime.now()
        for epoch in range(epochs):
            start_time = datetime.now()
            for i in range(x_train.shape[0] // batch_size + np.sign(x_train.shape[0] % batch_size)):
                st = i * batch_size
                ed = min((i + 1) * batch_size, y_train.shape[0])

                y_pred = self.__forward(x_train[st:ed])
                self.__backward(x_train[st:ed], y_pred, y_train[st:ed])

            rtime = (datetime.now() - start_time).total_seconds()
            y_pred = self.__forward(x_train)
            rloss = self.cross_entropy_loss(y_train, y_pred)
            rmetric = self.accuracy(y_train, y_pred)

            print(f"epoch: {epoch+1: >2}, time: {rtime:.3f} sec, train loss: {rloss:.3f}, train accuracy: {rmetric:.3f} ")

        print(f"all train time: {(datetime.now()-all_time).total_seconds():.3f} sec")
            
        y_pred = self.__forward(validation_data[0])
        rloss = self.cross_entropy_loss(validation_data[1], y_pred)
        rmetric = self.accuracy(validation_data[1], y_pred)

        print()
        print(f"test loss: {rloss:.3f}, test accuracy: {rmetric:.3f} ")

In [6]:
HIDDEN_LAYER = 300
LR = 0.1
NUMBER_EPOCH = 20
BATCH_SIZE = 64

fcnn = FCNN(input_layer=x_train.shape[1], hidden_layer=HIDDEN_LAYER, output_layer=OUTPUT_LAYER)
fcnn.train(x_train[:], y_train[:], validation_data=(x_test, y_test),
           epochs=NUMBER_EPOCH, learning_rate=LR, batch_size=BATCH_SIZE)

epoch:  1, time: 3.065 sec, train loss: 0.248, train accuracy: 0.926 
epoch:  2, time: 3.159 sec, train loss: 0.175, train accuracy: 0.949 
epoch:  3, time: 3.202 sec, train loss: 0.134, train accuracy: 0.961 
epoch:  4, time: 3.225 sec, train loss: 0.108, train accuracy: 0.969 
epoch:  5, time: 3.893 sec, train loss: 0.091, train accuracy: 0.974 
epoch:  6, time: 3.234 sec, train loss: 0.078, train accuracy: 0.978 
epoch:  7, time: 3.492 sec, train loss: 0.068, train accuracy: 0.981 
epoch:  8, time: 5.228 sec, train loss: 0.061, train accuracy: 0.983 
epoch:  9, time: 3.821 sec, train loss: 0.055, train accuracy: 0.985 
epoch: 10, time: 3.661 sec, train loss: 0.049, train accuracy: 0.987 
epoch: 11, time: 3.814 sec, train loss: 0.045, train accuracy: 0.988 
epoch: 12, time: 3.529 sec, train loss: 0.041, train accuracy: 0.989 
epoch: 13, time: 3.590 sec, train loss: 0.037, train accuracy: 0.990 
epoch: 14, time: 3.526 sec, train loss: 0.034, train accuracy: 0.991 
epoch: 15, time: 3.7