In this chapter, we will only make our Backpropagation algorithm a little more efficient using vectorization. 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from numba import njit
import matplotlib.image as img

In [2]:
class Backprop:

    def sigmoid(r):
            return (1 + np.exp(-r)) ** (-1)

    def train(x_train, y_train, hidden_units = 3, learning_rate = 10e-4, tol = 10e-3):
        # We need y_train to be 2D. There should be as many rows as there are x_train vectors
        N = x_train.shape[0]
        I = x_train.shape[1]
        J = hidden_units 
        K = y_train.shape[1]

        x_train = np.hstack((x_train, -np.ones(N).reshape(-1,1))).T 
        
        W_input = np.random.rand(J, I+1)
        W_hidden = np.random.rand(K, J+1) 

        m = 0
        learning = True

        while learning:

            ##### ----- Phase 1: Forward Propagation ----- #####
               
            u_hidden = W_input @ x_train 
            h = np.vstack((Backprop.sigmoid(u_hidden), -np.ones(N)))
            y_pred = W_hidden @ h 
            
        
            ##### ----- Phase 2: Backward Propagation ----- #####
         
            delta_output = y_pred.T - y_train 
            E_output = delta_output.T @ h.T
            W_delta_output = -learning_rate * E_output
            W_hidden += W_delta_output

            delta_hidden = ( delta_output @ W_delta_output ) * h.T*(1 - h.T)
            E_hidden = delta_hidden[:, :-1].T @ x_train.T
            W_delta_hidden = -learning_rate * E_hidden
            W_input += W_delta_hidden


            if ( np.sum(E_hidden**2) + np.sum(E_output**2) ) < tol: 
               learning = False
            
            m += 1 # Iteration count
            
        Backprop.weights = [W_input, W_hidden]
        Backprop.iterations = m


 ##### ----- #####


    def predict(x):
        N = x.shape[0]
        x = np.hstack((x, -np.ones(N).reshape(-1,1))).T
        output = ( Backprop.weights[1] @ np.vstack(   ( Backprop.sigmoid(Backprop.weights[0] @ x), -np.ones(N) )   ) ).T
        return output




In [3]:
x_train = np.array([[1., 2., -3., 10.], [0.3, -7.8, 1., 2.]])
y_train = np.array([[10, -3, 6, 1], [1, 1, 6, 1]])

it = []
mae = []
sqerror = []

for tol in [10e-1, 10e-2, 10e-3, 10e-4]:
    Backprop.train(x_train, y_train, tol = tol)
    a = Backprop.predict(x_train)
    it.append(Backprop.iterations)
    mae.append(np.abs((y_train - a)).mean())
    sqerror.append( ((y_train - a)**2).sum() )

print("Iterations:", it)
print("Mean absolute error:", mae)
print("Sum of squared errors:", sqerror)

Iterations: [2598, 4911, 6535, 8634]
Mean absolute error: [0.41321719278083047, 0.1387389734104736, 0.04151271030082801, 0.012662724617159588]
Sum of squared errors: [1.8504269564944793, 0.1983681070166248, 0.017430715116277258, 0.00174373340785554]
