## Imports

In [1]:
import numpy as np

## Network Class

In [134]:
"""
Simple_NN
Class to create a simple neural network
    Activation Function: sigmoid
    Learning Algorithm: stochastic gradient descent with backpropagation
    Cost Function: Mean squared error
"""
class Simple_NN(object):
    """ 
    INITIALIZE THE NETWORK
    """
    def __init__(self, layers, activation_function="sigmoid"):
        """
        self.layers is a list of numbers where the ith number how many neurons are in
        the ith layer of the network.
        """
        self.layers = layers;
        self.num_layers = len(layers);
        
        """
        self.weights[Layer - 1, input_neuron, output_neuron] = 
                                            List of weight matrices for each layer.                      
        self.biases[Layer - 1, neuron] = 
                                            List of vectors with biases for each neuron  
        FOR EXAMPLE:
            self.weights[l, j, i] = weight going into the jth neuron of the lth layer
                                    from the ith neuron of the (l-1)st layer 
            self.biases[l, k] = bias on the kth nuron of the lth layer
        NOTE: layer 0 is the input layer, so self.weights[0] is the weights going into layer 1
        """
        self.weights = [];
        self.biases = [];
        self.Z = [];
        self.activations = [];
        # Create matrices with correct dimensions 
        for layer_num in range(1, self.num_layers):
            self.weights.append(np.random.randn(layers[layer_num], layers[layer_num - 1]));
            self.biases.append(np.random.randn(layers[layer_num]));
        """
        self.activation = string specifying what activation function the neurons will use. 
        The options are:
            sigmoid (default)
        """
        self.activation_function = activation_function;
        
    """ 
    ACTIVATION FUNCTION
    For this network, we use the sigmoid function to calculate neuron activation
    """      
    def activation(self, z):
        if (self.activation_function == "sigmoid"):
            return 1.0 / (1 + np.exp(-z));
    
    def activation_derivative(self, z):
        if (self.activation_function == "sigmoid"):
            return (1 - self.activation(z)) * self.activation(z);
        
    """
    TRAINING
    Train the network using stochastic gradient descent and backpropagation.
    Training data should be given in the following format:
        [x11, x12, ..., x1i, y1
         x21, x22, ..., x2i, y2
         ...
         xm1, xm1, ..., xmi, ym]
    Where each row corrsponds to a training example with i data points
    """
    def train(self, training_data, batch_size, num_epochs, learning_rate):
        for epoch in range(num_epochs):
            print("EPOCH: %d" % epoch);
            # Randomize the order of training examples
            np.random.shuffle(training_data);
            # Separate inputs from outputs
            inputs = training_data[:, :-1]
            outputs = training_data[:, -1];
            # For each epoch, loop through each batch to use as training data
            for batch in range(len(training_data))[::batch_size]:
                # For each batch, we calculate activations and use the backpropagation algorithm
                # to change the weights and biases using gradient descent
                self.Z = [];
                self.activations = [];
                # Create matrix out of all training inputs in the batch
                # If the first layer of the network has k neurons, and each training
                # example has i data points, then weights will be a kxi matrix 
                # so Wx_j = kx1 vector.
                # To apply W to all input vectors, we can multiply WX where
                # X is the ixm matrix containing all m training examples as columns
                X = inputs[batch : batch + batch_size];
                X = np.transpose(X);
                Y = outputs[batch : batch + batch_size];

                # *** DEBUGGING  ***
                print('Batch #%d' % batch);
                print('X');
                print(X);
                print('Y')
                print(Y);
                # FEEDFORWARD
                """
                self.Z[layer, training_example, neuron] = 
                                            List of vectors with weighted inputs to the neurons
                self.activations[layer, training_example, neuron] = 
                                            List of vectors with activations for each neuron
                """
                # Calclate outputs going forwards through the network
                for layer in range(self.num_layers - 1):
                    if layer == 0:
                        # Feed inputs to the network
                        prev_activations = X;
                    else:
                        prev_activations = self.activations[layer - 1];
                    # Bias matrix where each column is a copy of the bias vector is needed
                    # to add bias terms for each training example. 
                    one_vector = np.ones(batch_size);
                    bias_matrix = np.outer(self.biases[layer], one_vector);
                    self.Z.append(np.dot(self.weights[layer], prev_activations) + bias_matrix);
                    self.activations.append(self.activation(self.Z[layer]));
            
                    # Backpropagation
                    pass
                    # Gradient Descent
    
    """ 
    TODO: 
        test(testing_data)
    """
    def print_network(self):
        print("Weights: ")
        for layer in self.weights:
            print(layer)
        print("\nBiases:" )
        for layer in self.biases:
            print(layer)
        print("\nWeighted Inputs:")
        for layer in self.Z:
            print(layer)
        print("\nActivations:")
        for layer in self.activations:
            print(layer);

In [137]:
"""
TEST NETWORK CREATION
"""
test = Simple_NN([4, 6, 2]);
random_data = np.matrix('1, 2, 3, 4, 20; 11, 12, 13, 14, 30; 21, 22, 23, 24, 40')
batch_size = 2;
num_epochs = 3;
learning_rate = 0.5;
test.train(random_data, batch_size, num_epochs, learning_rate);

print("After training");
test.print_network();



EPOCH: 0
Batch #0
X
[[ 1 21]
 [ 2 22]
 [ 3 23]
 [ 4 24]]
Y
[[20]
 [40]]
Batch #2
X
[[11]
 [12]
 [13]
 [14]]
Y
[[30]]
EPOCH: 1
Batch #0
X
[[11 21]
 [12 22]
 [13 23]
 [14 24]]
Y
[[30]
 [40]]
Batch #2
X
[[1]
 [2]
 [3]
 [4]]
Y
[[20]]
EPOCH: 2
Batch #0
X
[[21  1]
 [22  2]
 [23  3]
 [24  4]]
Y
[[40]
 [20]]
Batch #2
X
[[11]
 [12]
 [13]
 [14]]
Y
[[30]]
After training
Weights: 
[[ 1.17078602 -0.17434719  0.89476906 -0.28776759]
 [-0.83589868 -0.45646129 -1.35406509  1.00579799]
 [-0.64313304  0.6982566  -0.86917868  0.41219644]
 [-1.38281037 -0.25042522  1.26309675 -0.01167252]
 [-0.87556566 -0.20768007  0.40072757  1.00282825]
 [ 0.66959062 -0.7590287   0.53315077  0.16071834]]
[[-0.56120841 -0.54132984 -1.28661186  0.19411639  0.41357819 -0.44713059]
 [-1.63467226 -0.55831178  0.18517031 -0.02010543 -0.96289863  0.17955346]]

Biases:
[ 0.96292082  0.10285858  0.15880407  0.31242535 -0.37943537  2.46271064]
[ 0.40598243 -0.60168612]

Weighted Inputs:
[[ 19.35265227  19.35265227]
 [-18.09123659