In [2]:
# Single layer neural network
# Layer 1       Layer 2        Layer 3
# O               O              
# O               O              O 
# O               O              O
#                 O
# w_jk^{1}       w_jk^{2}       w_jk^{3}
# b_j^{1}        b_j^{2}        b_j^{3}
# j^th neuron in current `l`
# k^th neuron in `l-1`
# l = {1, 2, 3}
# a_j^{l} is the activation for the j^th neuron in the ^lth layer

import numpy as np
from ops import ReLU, Linear, MeanSquaredError

np.random.seed(0)

In [3]:
# Dimensions
j1 = 3

j2 = 4
k2 = j1

j3 = 2
k3 = j2

# Functions
relu = ReLU()
linear = Linear()
cost = MeanSquaredError()

# Input vector
num_features = 1
x = np.random.normal(size=(j1, ))
y = np.random.normal(size=(j3,))

# Layer components
w1 = None
b1 = None
z1 = None
a1 = linear(x)

w2 = np.random.uniform(size=(j2, k2))
b2 = np.zeros(shape=(j2, ))
z2 = np.dot(w2, a1) + b2
a2 = relu(z2)
print(f'w2 {w2.shape} DOT a1 {a1.shape} + b2 {b2.shape} = {np.dot(w2, a1).shape}')

w3 = np.random.uniform(size=(j3, k3))
b3 = np.zeros(shape=(j3, ))
print(f'w3 {w3.shape} DOT a2 {a2.shape} = ')
z3 = np.dot(w3, a2) + b3
a3 = linear(z3)

print('a3:', a3.shape)
print('a2:', a2.shape)

print('z3:', z3.shape)
print('z2:', z2.shape)

print('w3:', w3.shape)

print('Is w2 used at all for calculations?')

w2 (4, 3) DOT a1 (3,) + b2 (4,) = (4,)
w3 (2, 4) DOT a2 (4,) = 
a3: (2,)
a2: (4,)
z3: (2,)
z2: (4,)
w3: (2, 4)
Is w2 used at all for calculations?


In [15]:
def bp1(y_true, a_L, z_L, activation):
    """delta^{L} = dC/db^{L}"""
    return cost.gradient((y_true, a_L)) * activation.derivative(z_L)

In [23]:
delta_L = bp1(
    y_true=y, a_L=a3, z_L=z3, activation=linear)

print(delta_L.shape)

(2,)


In [21]:
def bp2(w_lyr_plus_one, delta_lyr_plus_1, activation, z_lyr):
    """delta^{l} = dCdb^{l}"""
    return (
        np.dot(np.transpose(w_lyr_plus_one), delta_lyr_plus_1) \
        * activation.derivative(z_lyr))

In [24]:
delta_2 = bp2(
    w_lyr_plus_one=w3,
    delta_lyr_plus_1=delta_L,
    activation=relu,
    z_lyr=z2)
    
print(delta_2.shape)

(4,)


In [39]:
def bp4(delta_lyr, a_lyr_minus_1):
    """dC/dW^{l}... based on backprop algo. 3. grad descent."""
    return np.dot(np.expand_dims(delta_lyr, axis=-1), np.transpose(np.expand_dims(a_lyr_minus_1, axis=-1)))

In [40]:
dC_dW_L = bp4(delta_lyr=delta_L, a_lyr_minus_1=a2)
print(dC_dW_L.shape)

(2, 4)
