In [2]:
import numpy as np


### objectives:
1. save activations and derivatives
2. Implement backprogation
3. Implement gradient descent
4. Implement train



In [35]:
# build the class of MLP


class MLP:
    
    def __init__(self, num_inputs=3, num_hidden=[3, 5], num_outputs=2):
        
        self.num_inputs = num_inputs
        self.num_hidden = num_hidden
        self.num_outputs = num_outputs
        
        
        layers = [self.num_inputs] + self.num_hidden + [self.num_outputs]
        
        # initiate random weights
        self.weights = []
        
        # create weights matrices for each two subsequent layers in the MLP architecture
        for i in range(len(layers)-1):
            w = np.random.rand(layers[i], layers[i+1])
            self.weights.append(w)
            
        # initiate activations
        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        
        self.activations = activations
        
        derivatives = []
        for i in range(len(layers) - 1):
            d = np.zeros((layers[i], layers[i + 1]))
            derivatives.append(d)
        self.derivatives = derivatives
        
       
    def forward_propagate(self, inputs):
        
        activations = inputs
        self.activations[0] = inputs
        
        for i, w in enumerate(self.weights):
            # calculate net inputs
            net_inputs = np.dot(activations, w)
        
            # calculate the activations
            activations = self._sigmoid(net_inputs)
            self.activations[i+1] = activations
        
        return activations
    
    
    def back_propagate(self, error, verbose=False):
        
        # dE/dW_i = (y - a_[i+1]).sigmoid'(h_[i+1]).a_i
        # sigmoid'(h_[i+1]) = sigmoid(h_[i+1])*(1 - sigmoid(h_[i+1]))
        # sigmoid(h_[i+1]) = a_[i+1]
        
        # dE/dW_[i-1] = (y - a_[i+1]).sigmoid'(h_[i+1]).W_i*sigmoid'(h_i)*a_[i_1]
        
        for i in reversed(range(len(self.derivatives))):
            # looping backwards through output to input
            activations = self.activations[i+1]
            delta = error * self._sigmoid_derivative(activations) # ndarray ([0.1, 0.2]) --> ndarray([[0.1, 0.2]])
            delta_reshaped = delta.reshape(delta.shape[0], -1).T
            current_activations = self.activations[i] # ndarray([0.4, 0.3,...]) --> ndarray([[0.4], [0.3], ...])
            current_activations_reshaped = current_activations.reshape(current_activations.shape[0], -1)
            
            self.derivatives[i] = np.dot(current_activations_reshaped, delta_reshaped)
            
            error = np.dot(delta, self.weights[i].T)
            
            if verbose:
                print("Derivative for W{}: {}".format(i, self.derivatives[i]))
        
        
        return error
    
    def gradient_descent(self, learning_rate):
        
        for i in range(len(self.weights)):
            weights = self.weights[i]
            print("original W{} = {}".format(i, weights))
            derivatives = self.derivatives[i]
            weights += learning_rate*derivatives
            print("updated W{} = {}".format(i, weights))
    
    def train(self, inputs, targets, epochs, learning_rate):
        
    
    def _sigmoid_derivative(self, x):
        
        return x * (1.0 - x)
    
    
    def _sigmoid(self, x):
        return 1/(1 + np.exp(-x))

In [37]:
mlp = MLP(2, [5], 1)

# create dummy date
input = np.array([0.1, 0.2])
target = np.array([0.3])

# forward propagation
output = mlp.forward_propagate(input)

# calculate error
error = target - output

# back propagation
mlp.back_propagate(error, verbose=True)

# apply gradient descent with lr=0.1
mlp.gradient_descent(learning_rate=0.1)

Derivative for W1: [[-0.0351131 ]
 [-0.03541882]
 [-0.03429802]
 [-0.03341844]
 [-0.03616995]]
Derivative for W0: [[-0.00105089 -0.00129954 -0.0003902  -0.00143482 -0.00131871]
 [-0.00210178 -0.00259907 -0.0007804  -0.00286963 -0.00263742]]
original W0 = [[0.68873874 0.86574572 0.067299   0.55675034 0.90974053]
 [0.73002426 0.73913051 0.78145715 0.25813579 0.95793435]]
updated W0 = [[0.68863365 0.86561576 0.06725998 0.55660686 0.90960866]
 [0.72981408 0.7388706  0.78137911 0.25784882 0.9576706 ]]
original W1 = [[0.67031696]
 [0.83072907]
 [0.24767907]
 [0.90732948]
 [0.84822154]]
updated W1 = [[0.66680565]
 [0.82718719]
 [0.24424927]
 [0.90398764]
 [0.84460455]]


In [6]:
mlp.weights

[array([[0.72710192, 0.9732658 , 0.4867014 ],
        [0.68513625, 0.3613799 , 0.04730775],
        [0.70226945, 0.60082526, 0.84729453]]),
 array([[0.62551885, 0.72825221, 0.90793098, 0.72033902, 0.17680321],
        [0.57589463, 0.29571383, 0.84027589, 0.17464583, 0.19593625],
        [0.08335056, 0.05957612, 0.05779832, 0.54067301, 0.56997644]]),
 array([[0.06248954, 0.45764739],
        [0.75112079, 0.91006899],
        [0.66130042, 0.80595545],
        [0.74847964, 0.60260046],
        [0.23913258, 0.11031879]])]

In [6]:
a = np.array([1, 2])
a

array([1, 2])

In [9]:
a_delta = a.reshape(a.shape[0], -1).T

a_delta

array([[1, 2]])

In [10]:
a_delta.shape

(1, 2)

In [11]:
b = np.array([1, 2, 3, 4, 5])

b

array([1, 2, 3, 4, 5])

In [12]:
b_activation = b.reshape(b.shape[0], -1)

b_activation

array([[1],
       [2],
       [3],
       [4],
       [5]])

In [13]:
np.dot(b_activation, a_delta)

array([[ 1,  2],
       [ 2,  4],
       [ 3,  6],
       [ 4,  8],
       [ 5, 10]])