In [None]:
import numpy as np


class Sigmoid:
    def __call__(self, x):
        return 1. / (1. + np.exp(-x))

    def deriv(self, x):
        return self(x) * (1 - self(x))


class MSE:
    def __call__(self, res, y):
        return sum((i - j) * (i - j) for i, j in zip(res, y))

    def deriv(self, res, y):
        return res - y


class Network:
    def __init__(self, shape, activation_f=Sigmoid(), cost_f=MSE()):
        np.random.seed(2048)
        self.shape = shape
        self.w = [np.random.randn(y, x) for x, y in zip(shape[:-1], shape[1:])]
        self.b = [np.random.randn(y, 1) for y in shape[1:]]
        self.activation_f = activation_f
        self.cost_f = cost_f

    def forward(self, x):
        ret = x.reshape(-1, 1)
        for w, b in zip(self.w, self.b):
            ret = self.activation_f(np.dot(w, ret) + b)
        return ret.reshape(-1)

    def backward(self, _x, _y):
        dw = [np.zeros(w.shape) for w in self.w]
        db = [np.zeros(b.shape) for b in self.b]
        x, y = _x.reshape(-1, 1), _y.reshape(-1, 1)
        a, z = [x], []

        for w, b in zip(self.w, self.b):
            x = np.dot(w, x) + b
            z.append(x)
            x = self.activation_f(x)
            a.append(x)

        dz = self.cost_f.deriv(a[-1], y) * self.activation_f.deriv(z[-1])
        dw[-1] = np.dot(dz, a[-2].transpose())
        db[-1] = dz

        for i in range(2, len(self.shape)):
            dz = np.dot(self.w[-(i - 1)].transpose(), dz) * self.activation_f.deriv(z[-i])
            dw[-i] = np.dot(dz, a[-(i + 1)].transpose())
            db[-i] = dz

        return (dw, db)

    def update(self, batch, lr):
        dw = [np.zeros(w.shape) for w in self.w]
        db = [np.zeros(b.shape) for b in self.b]

        for x, y in batch:
            _dw, _db = self.backward(x, y)
            dw = [w + _w for w, _w in zip(dw, _dw)]
            db = [b + _b for b, _b in zip(db, _db)]

        self.w = [w - (lr / len(batch)) * _w for w, _w in zip(self.w, dw)]
        self.b = [b - (lr / len(batch)) * _b for b, _b in zip(self.b, db)]

    def SGD(self, epochs, batch_size, lr, train_data, test_data=None):
        for epoch in range(1, epochs + 1):
            random.Random(epoch).shuffle(train_data)
            batchs = [train_data[i:i + batch_size] for i in range(0, len(train_data), batch_size)]
            for batch in batchs: self.update(batch, lr)
            if test_data: print(f"Epoch : {epoch}, Evaluate : {self.evaluate(test_data)} / {len(test_data)}")
            else: print(f"Epoch : {epoch}")

    def evaluate(self, test_data):
        ret = sum(int(np.argmax(self.forward(x)) == np.argmax(y)) for x, y in test_data)
        return ret


In [None]:
def Conv(data, n=10):
    x = data[1:].astype(np.float32) / 255
    y = np.array([int(i == data[0]) for i in range(n)]).astype(np.float32)
    return (x, y)

def load_data(path):
    data = np.loadtxt(path, delimiter=',', dtype=np.int)
    train_data, test_data = [*data[:9000]], [*data[9000:]]
    for i in range(len(train_data)): train_data[i] = Conv(train_data[i])
    for i in range(len(test_data)): test_data[i] = Conv(test_data[i])
    return (train_data, test_data)

train_data, test_data = load_data('/content/sample_data/mnist_test.csv')


In [None]:
N = Network([784, 28, 28, 10])
N.SGD(50, 10, 1.0, train_data, test_data = test_data)


Epoch : 1, Evaluate : 632 / 1000
Epoch : 2, Evaluate : 733 / 1000
Epoch : 3, Evaluate : 754 / 1000
Epoch : 4, Evaluate : 768 / 1000
Epoch : 5, Evaluate : 776 / 1000
Epoch : 6, Evaluate : 790 / 1000
Epoch : 7, Evaluate : 788 / 1000
Epoch : 8, Evaluate : 798 / 1000
Epoch : 9, Evaluate : 842 / 1000
Epoch : 10, Evaluate : 874 / 1000
Epoch : 11, Evaluate : 879 / 1000
Epoch : 12, Evaluate : 881 / 1000
Epoch : 13, Evaluate : 884 / 1000
Epoch : 14, Evaluate : 886 / 1000
Epoch : 15, Evaluate : 885 / 1000
Epoch : 16, Evaluate : 892 / 1000
Epoch : 17, Evaluate : 887 / 1000
Epoch : 18, Evaluate : 885 / 1000
Epoch : 19, Evaluate : 893 / 1000
Epoch : 20, Evaluate : 887 / 1000
Epoch : 21, Evaluate : 893 / 1000
Epoch : 22, Evaluate : 886 / 1000
Epoch : 23, Evaluate : 895 / 1000
Epoch : 24, Evaluate : 885 / 1000
Epoch : 25, Evaluate : 889 / 1000
Epoch : 26, Evaluate : 894 / 1000
Epoch : 27, Evaluate : 893 / 1000
Epoch : 28, Evaluate : 899 / 1000
Epoch : 29, Evaluate : 897 / 1000
Epoch : 30, Evaluate : 