# Neural Network from Scratch

In [2]:
import numpy as np
from random import random

# STEPS

# STEP 1: save the activations and derivatives within the MLP class
# STEP 2: implement backpropagation
# STEP 3: implement gradient descent
# STEP 4: implement train method
# STEP 5: train our net with some dummy dataset
# make some predictions



class MLP:
    
    # init se debe escribir con DOBLE SLASH AL PISO
    #3 in. layers, 3 and 5 hidden and 3 out
    def __init__(self, num_inputs = 3, num_hidden = [3, 5], num_outputs = 2): 
        
        self.num_inputs = num_inputs # these are attributes
        self.num_hidden = num_hidden
        self.num_outputs = num_outputs
        
        layers = [self.num_inputs] + self.num_hidden + [self.num_outputs] # list with num. of neurons in a layer
        
        # initiate random weights
        # 1. iterates through all layers and creates a matrix for each pair of layers
        # 2. Weight matrix. Rand creates random arrays with dif. dimensions
        # 2. This is a 2D array, rows = current layer, columns = layer + 1
        # 3. creates a list = weight matrices = # of layers - 1 Ex. 3 layers, 2 weight matrices.
        
        # create random connection weigths for the layers
        self.weights = [] # attribute called weights
        for i in range(len(layers)-1): # 1.
            w = np.random.rand(layers[i], layers[i+1]) # 2.
            self.weights.append(w) # 3.
        
        # STEP 1
        
        # create activations 
        activations = [] # is a list of arrays, where each array represents activations of given a layer
        for i in range(len(layers)):
            a = np.zeros(layers[i]) # creating dummy arrays for ativating layer
            activations.append(a)
        self.activations = activations
    
         # create derivatives using same process as activations
        derivatives = [] 
        for i in range(len(layers)-1):
            d = np.zeros((layers[i], layers[i+1])) 
            derivatives.append(d)
        self.derivatives = derivatives
        
    def forward_propagate(self, inputs): # forward prop. ist net input + activation
    
        activations = inputs
        self.activations[0] = inputs # first activation is equal to the input layer
        
        for i, w in enumerate(self.weights):
            
            # calculate net input
            net_inputs = np.dot(activations, w) # matrix multiplication betwe. activations and weights
            
            # STEP 1
            
            # calculate the activations
            activations = self._sigmoid(net_inputs) # passing net_inputs to sigmoid function
            self.activations[i + 1] = activations 
            
            # a3 = s(h_3) activation of third layer ist equal to sigmoid function of layer h3
            # h3 = a_2 * W_2 layer h3 ist result of matrix multiplication. That ist why we use the next layer [i+1]
        
        return activations
    
    # STEP 2
    
    #verbose used to see if back propagation is working
    def back_propagate(self, error, verbose=False):
        
        # this comes from the derivative formulas - see video 7
        # dE/dW = (y - a_[i+1]) * s'(h_[i+1]) * a_1 -- s' = derivative of sigma
        # s'(h_[i+1]) = s(h_[i+1])(1 - s'(h_[i+1]))
        # s(h_[i+1]) = a_[i+1]
        
        # derivative of th previous layer:
        # dE/dW [i-1] = (y - a_[i+1]) * s'(h_[i+1]) * W_i * s'(h_i) * a_[i-1] 
        
        for i in reversed(range(len(self.derivatives))): # "reversed" propagates layers from right to left
            activations = self.activations[i+1]
            
            # create a variable for (y - a[i+1]) * s'(h_[i+1])
            delta = error * self._sigmoid_derivative(activations) 
            
            # --> ndarray([0.1, 0.2]) --> ndarray([[0.1, 0.2]])
            delta_reshaped = delta.reshape(delta.shape[0], -1).T
            
            # creates a_1
            current_activations = self.activations[i] 
            
            # --> ndarray([0.1, 0.2]) --> ndarray([0.1], [0.2])
            current_activations_reshaped = current_activations.reshape(current_activations.shape[0],-1)
            
            # calculates derivatives
            self.derivatives[i] = np.dot(current_activations_reshaped, delta_reshaped) # dE/dW = (y - a[i+1]) * s'(h_[i+1]) * a_1
            
            error = np.dot(delta, self.weights[i].T) # this is (y - a_[i+1]) * s'(h_[i+1])
            
            if verbose:
                print("Derivatives for W{}: {}".format(i, self.derivatives[i]))
                
        return error
    
    # STEP 3
    
    def gradient_descent(self, learning_rate):
        for i in range(len(self.weights)):
            weights = self.weights[i]
            # print("Original W{} {}".format(i, weights)) to see if it works
            
            derivatives = self.derivatives[i]
            
            # update weights
            weights += derivatives * learning_rate
            # print("Updated W{} {}".format(i, weights)) to see if it works
    
    # STEP 4
    
    def train(self, inputs, targets, epochs, learning_rate):
        
        for i in range(epochs):
            
            sum_error = 0
            
            # zip unpacks inputs and targets, enumerate gives values and indexes
            for input, target in zip(inputs, targets):
                
                #perform forward prop
                output = self.forward_propagate(input)
    
                #calculate error
                error = target - output
    
                # perform back propagation
                self.back_propagate(error)
    
                # apply gradient descent
                self.gradient_descent(learning_rate)
            
                # report error for each epoch
                sum_error += self.mse(target, output) # mse mean squared error
            
            print("Error: {} at epoch {}".format((sum_error / len(inputs)), i))
                
    def mse(self, target, output):
        return np.average((target - output)**2)
                
    
    def _sigmoid_derivative(self, x):
        return x * (1.0 - x) # sigmoid derivative function
        
    
    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
if __name__ == "__main__":
    
    # STEP 6

#create an MLP
    mlp = MLP(2, [5], 1) # 2 input layers, one hidden with 5 neurons, 1 output

# create dummy data
    inputs = np.array([0.3, 0.1])
    targets = np.array([0.4])

# TRAIN OUR NMLP
    mlp.train(inputs, targets, 50, 0.1)

    output = mlp.forward_propagate(inputs)
    print()
    print()

    print("After fucking this many times, this thing believes that {} + {} is equal to {}".format(inputs[0], inputs[1], output[0]))

ValueError: shapes (2,5) and (1,2) not aligned: 5 (dim 1) != 1 (dim 0)