In [88]:
import pandas as pd
import numpy as np

In [125]:
class Layer:
    def __init__(self):
        self.input = None
        self.output = None
        
    # computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError
        
    # computes the dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotIMplementedError

In [126]:
# inherit from base class Layer
class FCLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of output neurons
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5
    
    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output
    
    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # dBias = output_error
        
        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [127]:
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
        
    # returns the activated input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output
    
    # returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

In [128]:
# activation function and its derivative
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.tanh(x)**2

In [129]:
# loss function and its derivative
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size

In [186]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None
        
    # add layer to network
    def add(self, layer):
        self.layers.append(layer)
        
    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime
        
    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []
    
        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)
        
        return result
    
    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate):
        # sample dimension first
        samples = len(x_train)
        
        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)
                    
                # compute loss (for display purpose only)
                print(y_train[j], output)
                err += self.loss(y_train[j], output)
                
                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)
            
            # calculate average error on all samples
            err /= samples
            if i%100==0:
                print('epoch %d/%d  error=%f' % (i+1, epochs, err))

In [185]:
# training data
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

# network
net = Network()
net.add(FCLayer(2, 3))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(3, 1))
net.add(ActivationLayer(tanh, tanh_prime))

# train
net.use(mse, mse_prime)
net.fit(x_train, y_train, epochs=1000, learning_rate=0.1)

# test
out = net.predict(x_train)
print(out)

epoch 1/1000  error=0.927312
epoch 101/1000  error=0.242375
epoch 201/1000  error=0.063630
epoch 301/1000  error=0.003204
epoch 401/1000  error=0.001360
epoch 501/1000  error=0.000831
epoch 601/1000  error=0.000589
epoch 701/1000  error=0.000452
epoch 801/1000  error=0.000364
epoch 901/1000  error=0.000304
[array([[0.00075019]]), array([[0.97994764]]), array([[0.97474474]]), array([[-0.0014362]])]


In [173]:
train = np.array(pd.read_csv("train.csv")).astype('float32')
test = np.array(pd.read_csv("test.csv")).astype('float32')

In [145]:
train.shape, test.shape

((42000, 785), (28000, 784))

In [147]:
def to_categorical(data):
    col = np.max(data)+1
    rows = len(data)
    result = np.zeros((rows,col))
    for i in range(data.shape[0]):
        result[i,data[i]] = 1
    return result

In [167]:
X_train, y_train = train[:,1:]/255, to_categorical(train[:,0].astype(int))
X_test = test/255
X_train = X_train.reshape(X_train.shape[0], 1, 28*28)
X_test = X_test.reshape(X_test.shape[0], 1, 28*28)

In [192]:
to_categorical(train[:,0])

TypeError: 'numpy.float64' object cannot be interpreted as an integer

In [180]:
mnist_net = Network()
mnist_net.add(FCLayer(28*28, 100))
mnist_net.add(ActivationLayer(tanh, tanh_prime))
mnist_net.add(FCLayer(100, 50))
mnist_net.add(ActivationLayer(tanh, tanh_prime))
mnist_net.add(FCLayer(50, 10))
mnist_net.add(ActivationLayer(tanh, tanh_prime))

In [187]:
mnist_net.use(mse, mse_prime)
mnist_net.fit(X_train[0:1000], y_train[0], epochs=35, learning_rate=0.1)

[0] [[-0.9999694  -0.78265669 -0.89008461 -0.99999861 -0.76528957 -0.99954556
  -0.7860186  -0.95651945 -0.57048489  0.99993393]]


IndexError: index 1 is out of bounds for axis 0 with size 1