# Classes and functionality for a Feed Forward Neural Network

In [12]:
import numpy as np

Activation functions and their derivatives

In [13]:
class Activation:
    def __init__(self,activation):
        self.activation = activation
    def get_function(self):
        def tanh(z):
            return np.tanh(z)
        def sigmoid(z):
            return 1 / (1 + np.exp(-z))
        def relu(z):
            return np.maximum(z, 0)
            return exp_z / np.sum(exp_z, axis=1, keepdims=True)
        if (self.activation == 'tanh'):
            return tanh
        if (self.activation == 'sigmoid'):
            return sigmoid
        if (self.activation == 'relu'):
            return relu
    def get_derivative(self):
        def tanh_derivative(output):
            return 1.0 - output**2
        def sigmoid_derivative(output):
            return output*(1-output)
        def relu_derivative(z):
            result = np.empty_like(z)
            result[z<=0] = 0
            result[z>0] = 1
            return result
        if (self.activation == 'tanh'):
            return tanh_derivative
        if (self.activation == 'sigmoid'):
            return sigmoid_derivative
        if (self.activation == 'relu'):
            return relu_derivative
        

Classes for layers

In [14]:
class Layer:
    def __init__(self, input_dim, units, activation):
        self.units = units
        self.activation = activation
        self.input_dim = input_dim
        np.random.seed(0)
        self.weights = np.random.randn(self.input_dim, self.units)/ np.sqrt(self.input_dim)
        self.biases = np.zeros((1,self.units))
    def compute(self,X):
        self.input = X.copy()
        self.output = self.activation.get_function()(X.dot(self.weights)+self.biases)
        return self.output
    def print_info(self):
        print("Layer with %d units and %s activation" %(self.units,self.activation))

In [44]:
class Model2Layers:
    def __init__(self,input_dim, output_dim, hidden_dim,  learning_rate=0.001, reg_lambda = 0.0005, 
                 activation_layer_1="sigmoid", activation_layer_2 = "sigmoid"):
        
        self.synapse_0 = Layer(input_dim,hidden_dim, Activation(activation_layer_1))
        self.synapse_1 = Layer(hidden_dim,output_dim, Activation(activation_layer_2))
        
        self.learning_rate = learning_rate     
        
        self.layers = [self.synapse_0, self.synapse_1]
        
        self.reg_lambda = reg_lambda
    
    def feed_forward(self,X):
        
        output = X.copy()
        
        for l in self.layers:
            output = l.compute(output)
        
        return output
    
    def predict(self,x):
        
        out = self.feed_forward(x)
        if out.shape[1] == 1:
            return 1 if out[0][0]>0.5 else 0
        else:
            return np.argmax(out,axis= 1)[0]
    
    def compute_loss(self,output,y):
        
        error = output - y 
        return np.mean(error**2)
    
    def compute_accuracy(self,output,y):
        
        num_samples = y.shape[0]
        
        acc= float(np.sum(np.argmax(y, axis= 1) == np.argmax(output, axis= 1)))/num_samples
        
        return acc
    def back_propagation(self, X, y, no_iterations = 60000):
        
        for j in xrange(no_iterations):
            
            # Feed forward through layers 0, 1, and 2            
            output = self.feed_forward(X)
        
            #compute error
            error_2 = output - y
            
            if (j% 10000) == 0:
                print "Error    after "+str(j)+" iterations:" + str(self.compute_loss(output,y))
                print "Accuracy after "+str(j)+" iterations:" + str(self.compute_accuracy(output,y))
            
            layer_2_delta = error_2*self.synapse_1.activation.get_derivative()(self.synapse_1.output)
            
            #backprop the error to layer 1
            error_1 = layer_2_delta.dot(self.synapse_1.weights.T)
            
            layer_1_delta = error_1*self.synapse_0.activation.get_derivative()(self.synapse_0.output)

            #update the weights
            # Add regularization terms
                
            dW2 = self.synapse_1.input.T.dot(layer_2_delta)
            dW1 = self.synapse_0.input.T.dot(layer_1_delta)
            
            dW2 += self.reg_lambda * self.synapse_1.weights
            dW1 += self.reg_lambda * self.synapse_0.weights
            
            self.synapse_1.weights -= self.learning_rate * dW2
            self.synapse_1.biases -= self.learning_rate * (np.ones((1,self.synapse_1.input.shape[0])).dot(layer_2_delta))
            
            self.synapse_0.weights -= self.learning_rate * dW1
            self.synapse_0.biases -= self.learning_rate * (np.ones((1,self.synapse_0.input.shape[0])).dot(layer_1_delta))
            