In [1]:
import numpy as np
import matplotlib.pyplot as plt
import struct

In [2]:
def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)

In [3]:
data = read_idx('./data/train-images.idx3-ubyte')

In [4]:
data = data.reshape(60000, 28*28)

In [5]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

In [6]:
class Layer:
    def __init__(self, input_size, size, activation_function, W=None, b=None):
        self.size = size
        self.W = np.random.rand(size, input_size)
        self.b = np.random.rand(size)
        self.func = activation_function
        
    def predict(self, input):
        return self.func(self.b + self.W.dot(input))

In [7]:
class MLP:
    def __init__(self, input_size, output_size, *args):
        self.input_size = input_size
        self.output_size = output_size
        
        for a in args:
            self.hidden_layers = args
        self.input_layer = Layer(input_size, self.hidden_layers[0].size, np.tanh)
        self.output_layer = Layer(self.hidden_layers[-1].size, self.output_size, softmax)
        self.layers = (self.input_layer,) + self.hidden_layers + (self.output_layer,)
    
    def predict(self, input):
        z = input
        for f in self.layers:
            z = f.predict(z)
        return z
    
    def grad(beta):
        pass

In [8]:
hl = Layer(28*28, 28*28, np.tanh)

In [9]:
mlp = MLP(28*28, 10, hl)

In [10]:
mlp.predict(data[55])

array([3.27781157e-07, 4.74525349e-13, 7.04665022e-06, 5.58884642e-16,
       4.01730652e-05, 9.94561140e-07, 9.99949589e-01, 1.86857480e-06,
       1.44418406e-10, 5.38028116e-11])