In [2]:
import tensorflow as tf
from tensorflow import keras
import numpy as np


def __Conv(data, n=10):
    x = data[1:].astype(np.float32) / 255
    y = np.array([int(i == data[0]) for i in range(n)]).astype(np.float32)
    return (x, y)

def Conv(data):
    ret = [*data]
    for i in range(len(ret)):
        ret[i] = __Conv(ret[i])
    return ret

def load_data():
    mnist = keras.datasets.mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    x_train = x_train.reshape(-1, 784)
    y_train = y_train.reshape(-1, 1)
    x_test = x_test.reshape(-1, 784)
    y_test = y_test.reshape(-1, 1)

    train_data = np.concatenate((y_train, x_train), axis = 1)
    test_data = np.concatenate((y_test, x_test), axis = 1)
    train_data = Conv(train_data)
    test_data  = Conv(test_data)

    return (train_data, test_data)

train_data, test_data = load_data()


In [5]:
class Sigmoid:
    def __call__(self, x):
        return 1. / (1. + np.exp(-x))

    def deriv(self, x):
        return self(x) * (1 - self(x))


class ReLU:
    def __call__(self, x):
        return np.maximum(0, x)

    def deriv(self, x):
        return (x > 0).astype(np.float32)


class LeakyReLU:
    def __call__(self, x):
        ret = x
        ret[ret < 0] *= 0.01
        return ret.astype(np.float32)

    def deriv(self, x):
        ret = x
        ret[ret < 0] = -0.01
        ret[ret > 0] = 1.
        return ret.astype(np.float32)


class MSE:
    def __call__(self, res, y):
        return sum((i - j) * (i - j) for i, j in zip(res, y))

    def deriv(self, res, y, z, activation_f):
        return (res - y) * activation_f.deriv(z)


class CrossEntropy:
    def __call__(self, res, y):
        return np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a)))

    def deriv(self, res, y, z, activation_f):
        return res - y


class Network:
    def __init__(self, shape, activation_f=ReLU(), cost_f=CrossEntropy()):
        np.random.seed(2048)
        self.shape = shape
        self.w = [np.random.uniform(-(6 / x) ** 0.5, (6 / x) ** 0.5, x * y).reshape(y, x) for x, y in
                  zip(shape[:-1], shape[1:])]
        self.b = [np.random.uniform(-(6 / x) ** 0.5, (6 / x) ** 0.5, 1 * y).reshape(y, 1) for x, y in
                  zip(shape[:-1], shape[1:])]
        self.activation_f = activation_f
        self.cost_f = cost_f

    def forward(self, x):
        ret = x.reshape(-1, 1)
        for w, b in zip(self.w, self.b):
            ret = self.activation_f(np.dot(w, ret) + b)
        return ret.reshape(-1)

    def backward(self, _x, _y):
        dw = [np.zeros(w.shape) for w in self.w]
        db = [np.zeros(b.shape) for b in self.b]
        x, y = _x.reshape(-1, 1), _y.reshape(-1, 1)
        a, z = [x], []

        for w, b in zip(self.w, self.b):
            x = np.dot(w, x) + b
            z.append(x)
            x = self.activation_f(x)
            a.append(x)

        dz = self.cost_f.deriv(a[-1], y, z[-1], self.activation_f)
        dw[-1] = np.dot(dz, a[-2].transpose())
        db[-1] = dz

        for i in range(2, len(self.shape)):
            dz = np.dot(self.w[-(i - 1)].transpose(), dz) * self.activation_f.deriv(z[-i])
            dw[-i] = np.dot(dz, a[-(i + 1)].transpose())
            db[-i] = dz

        return (dw, db)

    def update(self, batch, lr, lmbda, n):
        dw = [np.zeros(w.shape) for w in self.w]
        db = [np.zeros(b.shape) for b in self.b]

        for x, y in batch:
            _dw, _db = self.backward(x, y)
            dw = [w + _w for w, _w in zip(dw, _dw)]
            db = [b + _b for b, _b in zip(db, _db)]

        self.w = [(1 - lr * (lmbda / n)) * w - (lr / len(batch)) * _w for w, _w in zip(self.w, dw)]
        self.b = [b - (lr / len(batch)) * _b for b, _b in zip(self.b, db)]

    def SGD(self, epochs, batch_size, lr, lmbda, train_data, data_per_epoch, lr_scheduler=None, test_data=None):
        for epoch in range(1, epochs + 1):
            np.random.RandomState(epoch).shuffle(train_data)
            batchs = [train_data[i:i + batch_size] for i in range(0, data_per_epoch, batch_size)]
            for batch in batchs: self.update(batch, lr, lmbda, len(train_data))
            if test_data:
                print(f"Epoch : {epoch}, Evaluate : {self.evaluate(test_data)} / {len(test_data)}")
            else:
                print(f"Epoch : {epoch}")
            if lr_scheduler:
                lr = lr_scheduler(lr, epoch)

    def evaluate(self, test_data):
        ret = sum(int(np.argmax(self.forward(x)) == np.argmax(y)) for x, y in test_data)
        return ret

    def Calc(self, x):
        y = self.forward(x)
        return np.argmax(y)


In [4]:
def __DataAugmentation(x, d):
    dx = [0, -1, 0, 1]
    dy = [1, 0, -1, 0]
    x = x.reshape(28, 28)
    ret = np.zeros(x.shape)
    for i in range(28):
        for j in range(28):
            nx = i + dx[d]
            ny = j + dy[d]
            if nx < 0 or nx >= 28 or ny < 0 or ny >= 28: continue
            ret[nx][ny] = x[i][j]
    return ret.reshape(-1)

def DataAugmentation(data):
    n = len(data)
    for i in range(n):
        for j in range(4):
            data.append((
                __DataAugmentation(data[i][0], j), data[i][1]
            ))
    return data

train_data = DataAugmentation(train_data)
print(len(train_data))


300000


In [7]:
N = Network([784, 28, 28, 10])
scheduler = lambda lr, epoch: lr * (1.0 if (epoch + 1) % 30 else 0.5)
N.SGD(50, 10, 0.03, 0.1, train_data, 50000, lr_scheduler = scheduler, test_data = test_data)


Epoch : 1, Evaluate : 9278 / 10000
Epoch : 2, Evaluate : 9449 / 10000
Epoch : 3, Evaluate : 9508 / 10000
Epoch : 4, Evaluate : 9559 / 10000
Epoch : 5, Evaluate : 9562 / 10000
Epoch : 6, Evaluate : 9576 / 10000
Epoch : 7, Evaluate : 9608 / 10000
Epoch : 8, Evaluate : 9632 / 10000
Epoch : 9, Evaluate : 9641 / 10000
Epoch : 10, Evaluate : 9621 / 10000
Epoch : 11, Evaluate : 9633 / 10000
Epoch : 12, Evaluate : 9638 / 10000
Epoch : 13, Evaluate : 9651 / 10000
Epoch : 14, Evaluate : 9640 / 10000
Epoch : 15, Evaluate : 9671 / 10000
Epoch : 16, Evaluate : 9663 / 10000
Epoch : 17, Evaluate : 9661 / 10000
Epoch : 18, Evaluate : 9668 / 10000
Epoch : 19, Evaluate : 9680 / 10000
Epoch : 20, Evaluate : 9683 / 10000
Epoch : 21, Evaluate : 9680 / 10000
Epoch : 22, Evaluate : 9676 / 10000
Epoch : 23, Evaluate : 9699 / 10000
Epoch : 24, Evaluate : 9689 / 10000
Epoch : 25, Evaluate : 9698 / 10000
Epoch : 26, Evaluate : 9703 / 10000
Epoch : 27, Evaluate : 9698 / 10000
Epoch : 28, Evaluate : 9704 / 10000
E