# Improved version of simNet

In [1]:
import random
import numpy as np

In [2]:
class CrossEntropyCost:
    @staticmethod
    def output_layer_error(a, y):
        """
        returns the error at the output layer for the cross entropy cost fxn
        """
        return a-y
    
    @staticmethod
    def fn(a, y):
        """
        return the cost associated with a particular prediction
        """
        return np.sum(np.nan_to_num((-y*np.log(a)) - (1-y)*np.log(1-a)))

In [3]:
class NN:
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.weights = [np.random.randn(current, previous)/np.sqrt(previous) 
                        for previous, current in zip(self.sizes[:-1], self.sizes[1:])]
        self.biases = [np.random.randn(current, 1) 
                       for current in self.sizes[1:]]
        
    def forward_pass(self, x):
        for weight, bias in zip(self.weights, self.biases):
            z = np.dot(weight, x) + bias
            x = self.sigmoid(z)
        return x
    
    def SGD(self, training_data, epochs, bs, lr, 
            reg_param=0.0, validation_data=None):
        training_data = list(training_data)
        n = len(training_data)
        
        for i in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[index:index+bs] 
                            for index in range(0, n, bs)]
            for mini_batch in mini_batches:
                self.run_mini_batch(mini_batch, lr, reg_param, n)
                
            if validation_data:
                validation_data = list(validation_data)
                n_val = len(validation_data)
                
                num_correct = self.evaluate(validation_data)
                print(f'Epoch {i+1}: {num_correct} / {n_val}')
            else:
                print(f'Epoch {i+1} complete')
                
    def run_mini_batch(self, mini_batch, lr, reg_param, n):
        for x, y in mini_batch:
            grad_biases, grad_weights = self.backprop(x, y)
            
            # update rule for weights in SGD using regularization
            self.weights = [(1 - (lr*reg_param)/n)*old_weight - (lr/len(mini_batch))*gradient
                            for old_weight, gradient in zip(self.weights, grad_weights)]
            self.biases = [old_bias - (lr/len(mini_batch))*gradient
                           for old_bias, gradient in zip(self.biases, grad_biases)]
            
    def backprop(self, x, y):
        grad_weights = [np.zeros_like(weight) for weight in self.weights]
        grad_biases = [np.zeros_like(bias) for bias in self.biases]
        
        activations = [x]
        zs = []
        
        # forward pass
        for weight, bias in zip(self.weights, self.biases):
            z = np.dot(weight, x) + bias
            zs.append(z)
            x = self.sigmoid(z)
            activations.append(x)
            
        # backward pass
        error = CrossEntropyCost.output_layer_error(activations[-1], y) # error at the output layer
        grad_biases[-1] = error
        grad_weights[-1] = np.dot(error, activations[-2].T)
        
        for i in range(2, self.num_layers):
            error = np.dot(self.weights[-i+1].T, error) * self.sigmoid_prime(zs[-i])
            grad_biases[-i] = error
            grad_weights[-i] = np.dot(error, activations[-i-1].T)
        
        return grad_biases, grad_weights
        
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def sigmoid_prime(self, z):
        return self.sigmoid(z) * (1 - self.sigmoid(z))
    
    def evaluate(self, data):
        test_results = [(np.argmax(self.forward_pass(x)), y)
                        for x, y in data]
        return sum(int(x == y) for x, y in test_results)

## Data preparation

Neural network is tested on the MNIST dataset

In [4]:
import mnist_loader

In [5]:
train, val, test = mnist_loader.load_data_wrapper()

## Create Network

In [6]:
net = NN([784, 30, 10])

In [7]:
net.SGD(train, epochs=30, bs=10, lr=0.1, 
        reg_param=5.0, validation_data=val)

NameError: name 'grad_bias' is not defined