In [19]:
import numpy as np
import random
from sklearn.datasets import fetch_mldata
from sklearn.utils import shuffle

def sigmoid(x, inverse=False):
    result = 1 / (1 + np.exp(-x))

    if not inverse:
        return result
    else:
        return result * (1 - result)

class FeedForwardNetwork:
    def __init__(self, arch=np.array([784, 160, 60, 10]), lr=0.001, batch_size=200):
        self._arch = arch
        self.batch_size = batch_size
        self._lr = lr
        self._num_layers = len(self._arch)
        self._init_w()
        self._init_b()
        self.weight_decay = 0.2

    def _init_w(self):
        ''' Initialize weights using gaussian distribution '''
        self._w = [np.random.randn(j, i) for i, j in zip(self._arch[:-1], self._arch[1:])]

    def _init_b(self):
        ''' Initialize biases using gaussian distribution '''
        self._b = [np.random.randn(i, 1) for i in self._arch[1:]]

    def _calculate_z(self, x, n):
        ''' Calculate raw output value z at layer n '''
        return np.dot(self._w[n], x) + self._b[n]

    def _propagate(self, x, return_label=False):
        ''' Calculate the activation and output values at each layer given input x '''
        self._a = []
        self._o = []
        self._a.append(x)
        self._o.append(x)
        tmp = x

        num_neuron_layers = self._num_layers - 1
        output_layer = num_neuron_layers - 1

        for layer in range(num_neuron_layers):
            if layer == output_layer:
                activate = sigmoid
            else:
                activate = sigmoid

            tmp = self._calculate_z(tmp, layer)
            self._o.append(tmp)
            tmp = activate(tmp)
            self._a.append(tmp)

        if return_label:
            return tmp
        else:
            return self._a, self._o

    def _backpropagate(self, y):
        ''' Propagate error signal backward from output layer '''
        self._d = []

        ''' Pick activation functions for hidden and output layers '''
        output_layer_activate = sigmoid
        hidden_layer_activate = sigmoid

        ''' Calculating the gradient for the output layer l = n '''
        if output_layer_activate == sigmoid:
            out_d = np.multiply(
                self._a[-1] - y,
                output_layer_activate(self._o[-1], inverse=True)
            )
        elif output_layer_activate == softmax:
            out_d = self._a[-1] - y

        self._d.append(out_d)

        ''' Calculating the gradient for all layers l = 0 ... n-1 '''
        num_hidden_layers = self._num_layers - 2
        for l in range(num_hidden_layers):
            weights_l = (self._w[-(l+1)]).T
            delta_l = self._d[l]
            d = np.multiply(np.dot(weights_l, delta_l), hidden_layer_activate(self._o[-(l+2)], inverse=True))
            self._d.append(d)

        ''' Reversal of the list of gradients '''
        self._d = self._d[::-1]
        return self._d

    def _adjust_weights(self):
        ''' Adjust weights according to gradients self._d '''
        learn_rate = self._lr
        new_weights = []

        for weight, delta, activation in zip(self._w, self._d, self._a):
            regularization = (learn_rate * self.weight_decay) * weight
            new_w = weight - learn_rate * np.dot(delta, activation.T) - regularization
            new_weights.append(new_w)

        self._w = new_weights
        return self._w

    def _adjust_biases(self):
        ''' Adjust biases according to gradients self._d '''
        learn_rate = self._lr
        new_biases = []

        for bias, delta in zip(self._b, self._d):
            regularization = (learn_rate * self.weight_decay) * bias
            new_b = bias - learn_rate * (np.sum(delta, axis=1)).reshape(bias.shape) - regularization
            new_biases.append(new_b)

        self._b = new_biases
        return self._b

    @staticmethod
    def _prepare_y(y):
        # Re-implementation of _prepare_train_targets to match our current data format
        a = np.zeros((y.shape[0], 10, 1))

        for i, label in enumerate(y):
            a[i][label] = 1
            a[i] = a[i].reshape(10,1)

        return a

    def test_network(self, test_input, test_labels):
        acc = []

        output_labels = self._propagate(test_input, return_label=True).T
        num_images = output_labels.shape[0]

        for i in range(num_images):
            output = np.argmax(output_labels[i])
            if output == test_labels[i]:
                acc.append(1)

        return float(len(acc)) / float(num_images)



if __name__ == "__main__":
    n_train = 60000
    n_test = 10000
    k_folds = 12
    fold_size = n_train/k_folds
    epochs = 250
    alpha = 0.005
    batch_size = 200
    seed = random.random_seed(0)
    
    net = FeedForwardNetwork(arch=[784,160,60,10], lr=alpha, batch_size=batch_size)
    mnist = fetch_mldata('MNIST original')
    
    indices = range(len(mnist.data))
    
    train_idx = range(0,n_train)
    test_idx = range(n_train+1,n_train+n_test)
    
    X_train, y_train = mnist.data[train_idx], mnist.target[train_idx]
    X_test, y_test = mnist.data[test_idx], mnist.target[test_idx]
    
    for e in range(epochs):
        X_train, y_train = shuffle(X_train, y_train, random_state=seed)
        
        for i in range(n_train/batch_size):
            
    
    


[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0  51 159 253 159  50   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0  48 238 252 252 252 237   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0  54 227 253 252 239 233 252  57   6   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0  10  60 224 252 253 252 202  84 252
 253 122   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0 163 252 252 252 253 252 252  96 189 253 167   