In [2]:
import numpy as np
class MLP:
    def __init__(self, num_inputs=3, num_hidden=[3,5], num_outputs=2):
        self.num_inputs = num_inputs
        self.num_hidden= num_hidden
        self.num_outputs = num_outputs
        #input layer, two hidden layers(one with 3 and then with 5 neurons)
        layers = [self.num_inputs] + self.num_hidden + [self.num_outputs] #its gonna be [3,3,5,2]
        #initiate random weights
        self.weights = []
        for i in range(len(layers)-1): #len(layers)=4 and we dont want to work for input layer
            w = np.random.rand(layers[i], layers[i+1])
            self.weights.append(w) #3 list-elements with all the weights
        
        #step 1:save the activations and derivatives

        activations = []
        for i in range(len(layers)): #from 0 to 3
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations #the first activations are the inputs
        
        derivatives = []
        for i in range(len(layers)-1): # from 0 to 2
            a = np.zeros((layers[i], layers[i+1]))
            derivatives.append(a)
        self.derivatives = derivatives
        
        
    def forward_propagate(self, inputs):
        activations = inputs
        self.activations[0]= inputs  # save the activations
        
        for i,w in enumerate(self.weights):
            #calculate net inputs
            net_inputs =  np.dot(activations, w)
            #calculate the activations
            activations = self._sigmoid(net_inputs)
            self.activations[i+1] = activations
            #print("activations:", activations)
        
        return activations
    
    # step 2 : implement backpropagation
    
    def back_propagate(self, error, verbose= False):
        
        #dE/dw_i = (y -a[i+1])*σ'(h[i+1])*a_i 
        # σ'(h[i+1]) = σ(h[i+1])*(1-σ(h[i+1]))
        # σ(h[i+1]) = α[i+1]
        #dE/dw_(i-1) = (y -a[i+1])*σ'(h[i+1])*w_i* σ'(h_i)*a_(i-1) 
        
        for i in reversed(range(len(self.derivatives))): #from 1 to 0
            activations = self.activations[i+1]
            delta = error * self._sigmoid_derivative(activations)
            #we want reshape ndarray([0.1,0.2]) ----> ndarray([[0.1, 0.2]])
            delta_reshaped =delta.reshape(delta.shape[0],-1).T
            
            current_activations = self.activations[i]
            #we want reshape ndarray([0.1,0.2]) ----> ndarray([[0.1],[0.2]])
            current_activations_reshaped = current_activations.reshape(current_activations.shape[0],-1)
            self.derivatives[i] = np.dot(current_activations_reshaped, delta_reshaped)
            error = np.dot(delta, self.weights[i].T)
            
            #if verbose:
            #    print("Derivatives for W{}: {}".format(i,self.derivatives[i]))
        return error
            
        
    # step 3 : gradient descent
    def gradient_descent(self, lr):
        for i in range(len(self.weights)):
            weights = self.weights[i]
            #print("Original w{} {}".format(i , weights))
            derivatives = self.derivatives[i]
            weights += derivatives * lr
            #print("Updated w{} {}".format(i , weights))
    
    #step 4: training 
    def train(self, inputs, targets, epochs, lr):
        for i in range(epochs):
            sum_error = 0
            for input, target in zip(inputs, targets):
                
                #forward propagation
                output =self.forward_propagate(input)
                
                #calculate error
                error = target - output
                
                #back propagation
                self.back_propagate(error)
                
                #apply gradient descent
                self.gradient_descent(lr=1)
                
                sum_error += self._mse(target, output)
                
            # report error
            print("Error: {} at epoch {}".format((sum_error/len(inputs)), i))   
        

    def _mse(self, target, output):
        return np.average((target-output)**2)
    
    def _sigmoid_derivative(self, x):
        return x * (1.0 - x)
        
    
    def _sigmoid(self,x):
        return 1/ (1+ np.exp(-x))

### Training

In [173]:
#test backpropagate function 
#create an mlp
mlp = MLP(2,[5],1)
#create dummy data
inputs= np.array([0.1,0.2])
target = np.array([0.3])
#forward propagation
output =mlp.forward_propagate(inputs)
output

array([0.79381418])

In [174]:
#calculate error 
error = target - output

mlp.back_propagate(error, verbose = True)

Derivatives for W1: [[-0.04197785]
 [-0.04109672]
 [-0.04426913]
 [-0.04294043]
 [-0.0439249 ]]
Derivatives for W0: [[-0.00200177 -0.00045164 -0.00093336 -0.00132001 -0.0004255 ]
 [-0.00400353 -0.00090328 -0.00186673 -0.00264003 -0.000851  ]]


array([-0.02330815, -0.01778374])

In [175]:
#apply gradient descent
mlp.gradient_descent(lr=1)

Original w0 [[0.28962904 0.00429329 0.66519374 0.60008677 0.78993472]
 [0.24282953 0.16728521 0.62474632 0.32641771 0.47648135]]
Original w1 [[0.99216709]
 [0.22358177]
 [0.46616942]
 [0.65584369]
 [0.21218291]]


In [185]:
# train mlp on dummy data
from random import random
inputs = np.array([[random()/2 for _ in range(2)] for _ in range(1000)])
targets = np.array([[i[0] + i[1]] for i in inputs])

In [186]:
print(inputs.shape)
print(targets.shape)

(1000, 2)
(1000, 1)


In [193]:
mlp = MLP(2,[5],1)

mlp.train(inputs, targets, 50, 0.1)

Error: 0.04366209858946426 at epoch 0
Error: 0.020391732177764493 at epoch 1
Error: 0.004274119668779659 at epoch 2
Error: 0.0012214577969696838 at epoch 3
Error: 0.0006730106476643253 at epoch 4
Error: 0.0005512380176952948 at epoch 5
Error: 0.0005147460485439905 at epoch 6
Error: 0.0004973896842203023 at epoch 7
Error: 0.0004850585392775636 at epoch 8
Error: 0.00047458796171103015 at epoch 9
Error: 0.00046521455170948125 at epoch 10
Error: 0.00045670463173131217 at epoch 11
Error: 0.00044894248185086967 at epoch 12
Error: 0.00044184247198668803 at epoch 13
Error: 0.0004353313404208244 at epoch 14
Error: 0.0004293446524429053 at epoch 15
Error: 0.00042382571308802934 at epoch 16
Error: 0.0004187247616086556 at epoch 17
Error: 0.0004139981712345201 at epoch 18
Error: 0.00040960767903048986 at epoch 19
Error: 0.00040551968213724444 at epoch 20
Error: 0.00040170461517853443 at epoch 21
Error: 0.0003981364090534281 at epoch 22
Error: 0.00039479202460825704 at epoch 23
Error: 0.00039165105

The idea is we have the train dataset, we create an mlp, we train it and we use it for predictions. After the trai

In [198]:
# make predictions on dummy data
input = np.array([0.3, 0.1])
target = np.array([0.4])
output = mlp.forward_propagate(input)
print()
print()
print("Our network believes that {} + {} is equal to {}".format(input[0], input[1],output[0]))



Our network believes that 0.3 + 0.1 is equal to 0.385911352814274
