In [50]:
import math
import numpy as np

def sigmoid(x):
  return 1 / (1 + math.exp(-x))
sigmoid_v = np.vectorize(sigmoid)

def relu_derivative(x):
    return x > 0
relu_derivative_v = np.vectorize(relu_derivative)

In [179]:
# X.shape = (4, 3) (num_examples, num_input_features)
# y.shape = (4, 1) (num_examples, num_output_features)

X = np.array([[0, 0, 1], [1, 1, 0], [1, 0, 0], [0, 1, 1]])
y = np.array([[0, 1, 1, 0]]).T

W1 = np.random.random((3, 4)) - 0.5 # (num_input_features, hidden_features)
b1 = np.random.random((1, 4)) - 0.5
W2 = np.random.random((4, 1)) - 0.5 # (hidden_features, num_output_features)
b2 = np.random.random((1, 1)) - 0.5

for i in range(100):
    # compute forward pass
    z_1 = np.dot(X, W1) + b1 # (num_examples, 4)
    a_1 = np.maximum(z_1, 0) # (num_examples, 4)
    z_2 = np.dot(a_1, W2) + b2 # (num_examples, 1)
    a_2 = sigmoid_v(z_2) # (num_examples, 1)
#     L = np.sum((y - a_2)**2)
    L = - np.sum(y * np.log(a_2) + (1-y) * (np.log(1-a_2)))
    print(L)

    # compute backward pass
#     da_2 = (-2 * y + 2 * a_2) * L # dL/da_2 ; (4, 1)
    da_2 = (-y / a_2) + (1-y)/(1-a_2) * L
    dz_2 = (a_2 * (1 - a_2)) * da_2 # dL/dz_2 = da_2/d_z2 * dL/da_2 ; (4, 1)
    dW2 = np.dot(a_1.T, dz_2)  # dL/dW2 = dz_2/dW2 * dL/dz_2 ; (4, 1)
    db2 = dz_2
    da_1 = np.dot(dz_2, W2.T) # dL/da_1 = dz_2/da_1 * dL/d_z2 (4, 4)
    dz_1 = relu_derivative_v(z_1) * da_1 # dL/dz_1 = dz_1/da_1 * dL/dz_1 (4, 4)
    dW1 = np.dot(X.T, dz_1) # dL/dw_1 = dz_1/dW1 * dL/dz_1 (3, 4)
    db1 = np.sum(dz_1, axis=1) # 1

    # update gradients
    W2 = W2 - 0.03 * dW2
    b2 = b2 - 0.03 * db2
    W1 = W1 - 0.03 * dW1
    b1 = b1 - 0.03 * db1
    
    # Original bugs:
    # - in `dz_2 = (a_2 * (1 - a_2)) * da_2`, accidentally used z_2 instead of a_2
    # - in da_2 computation, forgot to multiply by loss

2.890303145784619
2.7997001198377207
2.723596884576048
2.656814622955279
2.596644540802915
2.541396578982859
2.4899561958848464
2.441553604078974
2.395635313651831
2.351788739095291
2.309696043354913
2.269104930993223
2.2298097040247353
2.1916387652117764
2.154446293185981
2.1181066727658804
2.0825107622120314
2.0475633810167895
2.0131815939439277
1.9792934960569892
1.9458372945690443
1.912760550049176
1.880019489158516
1.8475783377979915
1.815408649939274
1.7834886254180673
1.7518024212538044
1.7203394671360885
1.6890937979648921
1.6580634159603398
1.6272496928794813
1.59665682008597
1.566291311196835
1.5361615591722195
1.5062774472555038
1.4766500112273737
1.4472911490354132
1.418213372970057
1.3898999959251803
1.3621130435028632
1.3345980160031605
1.3073684406081663
1.2804379014961156
1.2538199204904577
1.2275278503271134
1.2015747780793906
1.1759734368364467
1.1507361242500829
1.125874627029115
1.1014001508597882
1.0773232555588657
1.0536537955214667
1.0304008657099795
1.0075727535