In [2]:
import numpy as np
np.set_printoptions(precision=4)

In [3]:
W_0 = np.array([[1, -1],
                [-1, -1]], dtype=float)

In [4]:
W_1 = np.array([[1, 0],], dtype=float)

In [5]:
X = np.array([[1, 1],
              [0, 1],], dtype=float)

In [6]:
t = np.array([[0.5, 1],], dtype=float)

In [7]:
f_1 = 'BSigmoid'
f_2 = 'USigmoid'

In [8]:
lr = 0.1

In [9]:
def USigmoid(x, direction):
    if direction == 'F':
        return 1/(1 + np.exp(-x))
    else:
        return USigmoid(x, 'F')*(1-USigmoid(x, 'F'))

def BSigmoid(x, direction):
    if direction == 'F':
        return (1 - np.exp(-x))/(1 + np.exp(-x))
    else:
        return 0.5*(1-BSigmoid(x, 'F')**2) 

def Tanh(x, direction):
    if direction == 'F':
        return (1 - np.exp(-x))/(1 + np.exp(-x))
    else:
        return 0.5*(1-Tanh(x, 'F')**2) 

def ReLU(x, direction):
    if direction == 'F':
        return max(x, 0)
    else:
        return float(x > 0)

def Lin(x, direction):
    if direction == 'F':
        return x
    else:
        return 1

def activation(Z, fcn='Lin', direction = 'F'):
    A = np.array(list(map(globals()[fcn], Z, np.repeat(direction, Z.shape[0]))))
    return A

In [10]:
MAX_EPOCHS = 1

In [11]:
for ep in range(MAX_EPOCHS):
    print('\nEPOCH-', ep+1, '='*80)
    for itr, (x, y) in enumerate(zip(X.T, t.T)):
        print('\nITER-', itr+1, '-'*80)
        
        ### Forward pass (Input -> Hidden)
        if X.shape[0] == W_0.shape[1]:
            print('No bias')
            A_0 = x.reshape(-1, 1)
        else:
            print('Pad bias at top of input')
            A_0 = np.vstack((np.ones((1, x.shape[1])), x))
        print(f'{A_0 = }')

        print('\nInput -> Hidden')
        Z_1 = (W_0 @ A_0).reshape(-1, 1)
        print(f'    {Z_1 = }')
        A_1 = activation(Z_1, f_1)
        print(f'    {A_1 = }')
        
        ### Forward pass (Hidden -> Output)
        print('\nHidden -> Output')
        if A_1.shape[0] == W_1.shape[1]:
            print('No bias')
        else:
            print('Pad bias at top of A1')
            A_1 = np.vstack((np.ones((1, A_1.shape[1])), A_1))
            print(f'    {A_1 = }')

        Z_2 = (W_1 @ A_1).reshape(-1, 1)
        print(f'    {Z_2 = }')
        A_2 = activation(Z_2, f_2)  
        print(f'    {A_2 = }')
        
        ### Backward pass (Output -> Hidden)
        Error = 0.5*(A_2 - y)**2
        print(f'\n{Error = }')
        
        print('\nOutput -> Hidden')
        dE_dw1 = (A_2 - y) * activation(Z_2, f_2, 'B') * A_1
        print(f'    {dE_dw1 = }')
        
        ### Backward pass  (Hidden -> Input)
        print('\nHidden -> Input')
        if A_1.shape[0] == W_1.shape[1]: # No bias in layer-1
            dE_dw0 = (A_2 - y) * activation(Z_2, f_2, 'B') * (W_1 * activation(Z_1, f_1, 'B')) @ A_0 
        else:
            dE_dw0 = (A_2 - y) * activation(Z_2, f_2, 'B') * (W_1[:, 1:] * activation(Z_1, f_1, 'B')) @ A_0 
        print(f'    {dE_dw0 = }')
        
        ### Update weights
        print('\nWeight update')
        W_1 = W_1 - lr*dE_dw1
        print(f'    {W_1 = }')
        W_0 = W_0 - lr*dE_dw0
        print(f'    {W_0 = }')



ITER- 1 --------------------------------------------------------------------------------
No bias
A_0 = array([[1.],
       [0.]])

Input -> Hidden
    Z_1 = array([[ 1.],
       [-1.]])
    A_1 = array([[ 0.4621],
       [-0.4621]])

Hidden -> Output
No bias
    Z_2 = array([[0.4621]])
    A_2 = array([[0.6135]])

Error = array([[0.0064]])

Output -> Hidden
    dE_dw1 = array([[ 0.0124],
       [-0.0124]])

Hidden -> Input
    dE_dw0 = array([[0.0106],
       [0.0106]])

Weight update
    W_1 = array([[ 0.9988, -0.0012],
       [ 1.0012,  0.0012]])
    W_0 = array([[ 0.9989, -1.0011],
       [-1.0011, -1.0011]])

ITER- 2 --------------------------------------------------------------------------------
No bias
A_0 = array([[1.],
       [1.]])

Input -> Hidden
    Z_1 = array([[-0.0021],
       [-2.0021]])
    A_1 = array([[-0.0011],
       [-0.762 ]])

Hidden -> Output
No bias
    Z_2 = array([[-0.0001],
       [-0.002 ]])
    A_2 = array([[0.5   ],
       [0.4995]])

Error = array([[0