# Intuitive Neural Networks by Hand

This notebook shows how to build a neural network by hand.  The motivation for creating _yet another_ neural network from scratch post is that I believe most get lost in the calculus and lose the concepts.  Additionally, I believe the demonstation code is often designed to be pragmatic, not illustrative.

Below, we create classes to represent our neural network and then use it to fit to a simple example from the Iris dataset.

* NOTE: Code is still under active development and bugs likely exist.

# Concepts

Neural networks are essentially two things:
1. A collection of layers with weights
1. A forwards-backwards algorithm (of sorts) for optimizing those weights.

The forward-backward algorithm is a common paradigm outside of neural networks and is helpful to learn about. By analogy, the forward step simply computes the current outcome probabilities.  The backwards step updates the weights used in making predictions to those that seem best at the current iteration.  This process continues (hopefully) to convergence.

At prediction time, one is essentially re-computing the forward step of the neural network.

<img src="static/neural-network.png" alt="drawing" width="500"/>

In [46]:
import numpy as np
from sklearn import datasets
from sklearn.metrics import log_loss
from sklearn import preprocessing

EPSILON = 0.8


def binary_loss_function(predictions, Y):
    """Loss function for a binary classifier"""
    return log_loss(Y, (predictions[0] > 0.5).astype(int))


def relu_activation(x):
    """Vectorized relu activation function
    :return: 0 if x is less than 0, x otherwise.
    """
    if x > 0:
        return x
    else:
        return 0

    
def sigmoid_activation(x):
    """Vectorized sigmoid activation function
    :return: sigmoid of x
    """
    return 1. / (1 + np.exp(-x))

  
class Model:
    def __init__(self, data, Y, model_structure, cost_function, learning_rate):
        self.data = data
        self.Y = Y
        self.model_structure = model_structure
        self.cost_function = cost_function
        self.learning_rate = learning_rate
        self.iterations = None
        
    def train(self, learning_rate=0.1, num_iterations=5000):
        self.iterations = []
        prior_iteration = None
        for iteration in range(num_iterations):
            model_iteration = ModelIteration(self, self.data, self.Y, learning_rate, prior_iteration)
            self.iterations.append(model_iteration)
            
            iteration_output = model_iteration.feed_forward(self.data)
            model_iteration.propegate_backward() # update weights
    
            prior_iteration = model_iteration
            
            if iteration % 1000 == 0:
                print("Completed iteration {}.  Loss: {}".format(iteration, self.evaluate(self.data, self.Y)))
                
        return self.evaluate(self.data, self.Y)
            
    def predict(self, data=None):
        self.assert_trained()
        if data is None:
            data = self.data
        return self.iterations[-1].predict(data)
    
    def evaluate(self, data=None, Y=None):
        self.assert_trained()
        if data is None:
            data = self.data
            Y = self.Y
        return self.iterations[-1].evaluate(data, Y)
    
    def assert_trained(self):
        if self.iterations is None:
            raise Exception("Must train before running `predict`.")
        
    
class ModelIteration:
    def __init__(self, model, data, Y, learning_rate, prior_iteration=None):
        self.model = model  # locator pattern
        # don't store data again, just wasteful
        self.learning_rate = learning_rate
        self.prior_iteration = prior_iteration
        self.layers = []
        for layer_number, layer in enumerate(self.model.model_structure):
            if self.prior_iteration is None: # first iteration, must initialize weights
                if 0 == layer_number:
                    prior_layer_size = data.shape[1]
                else:
                    prior_layer_size = self.layers[-1].size
                weights = np.random.randn(layer["size"], prior_layer_size)
                betas = np.zeros((layer["size"], 1))
            else:
                weights = self.prior_iteration.layers[layer_number].weights # backprop output
                betas = self.prior_iteration.layers[layer_number].betas            
           
            layer = Layer(self, layer["size"], layer["activation"], weights, betas)
            self.layers.append(layer)
        
    def feed_forward(self, data):
        prior_output = self.model.data.T
        for layer in self.layers:
            output = layer.apply_weights(prior_output)
            #print(output)
            prior_output = output
        return output
    
    def predict(self, data):
         return self.feed_forward(data)
        
    def evaluate(self, data=None, Y=None):
        if data is None:
            data = self.model.data
            Y = self.model.Y
        predictions = self.predict(data)
        return self.model.cost_function(predictions, Y)

    def propegate_backward(self):
        # iterate backwards
        for layer in self.layers[::-1]:
            layer.update_weights(self.learning_rate)

class Layer:
    def __init__(self, model_iteration, size, activation_function, weights, betas):
        self.model_iteration = model_iteration  # locator pattern
        self.size = size
        self.activation_function = activation_function
        self.weights = weights
        self.betas = betas
        self.derivatives = None

    def apply_weights(self, layer_input):
        Z = np.dot(self.weights, layer_input) + self.betas
        output = self.activation_function(Z)
        return output
    
    def update_weights(self, learning_rate):
        if self.derivatives is None:
            self.calculate_derivatives()
        self.weights = self.weights - learning_rate * self.derivatives
    
    def calculate_derivatives(self):
        # add epsilon and substract epsilon from weights and re-run....
        original_weights = self.weights
        
        flat_weights = original_weights.reshape(np.size(original_weights))
        derivatives = []
        for index, weight in enumerate(flat_weights):
            epsilon_vector = np.zeros(flat_weights.shape[0])
            epsilon_vector[index] = EPSILON
            
            self.weights = (flat_weights - epsilon_vector).reshape(*original_weights.shape)
            cost1 = self.model_iteration.evaluate()
            self.weights = (flat_weights + epsilon_vector).reshape(*original_weights.shape)
            cost2 = self.model_iteration.evaluate()
        
            derivative = (cost2 - cost1) / (2 * EPSILON)
            self.weights = original_weights
            derivatives.append(derivative)
            
        self.derivatives = np.array(derivatives).reshape(*original_weights.shape)
#         print("Ending Calculate Derivatives")
#         print("Costs 1 and 2: {:.2f} & {:.2f}".format(cost1, cost2))
#         print(self.derivatives)
        return self.derivatives
       
        


In [47]:
iris = datasets.load_iris()
iris_x = preprocessing.scale(iris["data"])
iris_y = iris["target"]
Y2 = (iris_y == 0).astype(int)
#Y2 = (np.random.randn(len(iris_y)) > 0.3).astype(int)

structure = [{"size": 3, "activation": sigmoid_activation}, 
             {"size": 1, "activation": sigmoid_activation}]


model = Model(iris_x, Y2, structure, binary_loss_function, 0.01)
model.train()
# x = model.predict(iris_x)
# model.evaluate(x, Y2)

Completed iteration 0.  Loss: 11.512925464970227
Completed iteration 1000.  Loss: 9.992007221626415e-16
Completed iteration 2000.  Loss: 9.992007221626415e-16
Completed iteration 3000.  Loss: 9.992007221626415e-16
Completed iteration 4000.  Loss: 9.992007221626415e-16


9.992007221626415e-16

In [42]:
iris = datasets.load_iris()
iris_x2 = preprocessing.scale(iris["data"])
iris_x2.loc[iris['Name']=='virginica','species']=0
iris_x2.loc[iris['Name']=='versicolor','species']=1
iris_x2.loc[iris['Name']=='setosa','species'] = 2
iris_x2 = iris[iris['species']!=2]
#Create Input and Output columns
X2 = iris[['PetalLength', 'PetalWidth']].values.T
Y2 = iris[['species']].values.T
Y2 = Y2.astype('uint8')

AttributeError: 'numpy.ndarray' object has no attribute 'loc'