1、 Hand code a 3-layer NN with binary cross entropy loss ( only NumPy allowed ), including bias term ( B) 

2、 Hand code a 3 layer NN with leaky relu as activation,  linear output and MSE as loss 

In [1]:
import numpy as np

In [2]:
N, D_in, H1, H2, D_out = 64, 1000, 128, 32, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H1)*0.01
w2 = np.random.randn(H1, H2)*0.01
w3 = np.random.randn(H2, D_out)*0.01
b1 = np.zeros(H1)
b2 = np.zeros(H2)
b3 = np.zeros(D_out)

eta = 1e-3

In [3]:
def relu(x):
    A = np.maximum(0, x)
    return A, x

def leaky_relu(Z):
    A = np.maximum(0.1 * Z, Z)
    return A, Z


In [4]:
def forward(x, w1, w2, w3, b1, b2, b3):
    Z1 = np.dot(x, w1) + b1
    A1, Z1 = leaky_relu(Z1)

    Z2 = np.dot(A1, w2) + b2
    A2, Z2 = leaky_relu(Z2)

    Z3 = np.dot(A2, w3) + b3
    A3 = Z3
    return A1, A2, A3

# _, _, A3 = forward(x, w1, w2, w3, b1, b2, b3)
# print(A3)

In [5]:
def leaky_relu_gradient(A):
    dA = np.ones_like(A)
    dA[A < 0] = 0.1
    return dA

In [6]:
def backward(A1, A2, A3, W1, W2, W3, b1, b2, b3, x, y, eta):
    dA2 = leaky_relu_gradient(A2)
    dA1 = leaky_relu_gradient(A1)

    delta3 = 2.0 * (A3 - y)
    delta2 = dA2 * (delta3@W3.T)
    delta1 = dA1 * (delta2@W2.T)
    
    W3 = W3 - eta * (A2.T@delta3)
    W2 = W2 - eta * (A1.T@delta2)
    W1 = W1 - eta * (x.T@delta1)
    
    b3 = b3 - eta * delta3
    b2 = b2 - eta * delta2
    b1 = b1 - eta * delta1
    
    return W1, W2, W3, b1, b2, b3

In [7]:
for t in range(500):
    A1, A2, A3 = forward(x, w1, w2, w3, b1, b2, b3)
    
    loss = np.square(A3 - y).sum()
    print(f"{t}:\t{loss}")

    w1, w2, w3, b1, b2, b3 = backward(A1, A2, A3, w1, w2, w3, b1, b2, b3, x, y, eta)

0:	604.9474886526497
1:	602.444877688097
2:	599.9527162306115
3:	597.4698258349067
4:	594.994832188803
5:	592.5256230409918
6:	590.059888217126
7:	587.5965628471556
8:	585.1327961805997
9:	582.6667801143243
10:	580.1962881107254
11:	577.7168571104282
12:	575.2237437011379
13:	572.7141958106569
14:	570.182078211494
15:	567.6205857626227
16:	565.0193579720385
17:	562.3637781882201
18:	559.6431537613762
19:	556.8394533480191
20:	553.9277977008636
21:	550.8713404986927
22:	547.6215019249028
23:	544.121755637515
24:	540.2830004135628
25:	535.9959696013007
26:	531.1231297415047
27:	525.4803377790536
28:	518.8883726454364
29:	511.19738177505457
30:	502.4386434571961
31:	493.00164993469275
32:	483.6223966426785
33:	474.9727127180828
34:	467.04409742468204
35:	459.22464658214363
36:	450.9169641892131
37:	441.5858912694456
38:	431.01866320667426
39:	419.2667871569815
40:	406.788992048413
41:	393.96643632902357
42:	380.9952811212901
43:	368.04183572703437
44:	354.72677787584223
45:	340.8622446179

376:	0.015265956601808295
377:	0.015642398141643303
378:	0.016134149204837217
379:	0.016536645994129746
380:	0.017054380859128915
381:	0.017479461424928542
382:	0.018024529892465536
383:	0.018479631593028678
384:	0.01905335078608606
385:	0.01953303257920771
386:	0.020137226930029513
387:	0.020652946759374825
388:	0.02125395133892733
389:	0.02180114516543721
390:	0.022435728273113242
391:	0.02301997752969726
392:	0.023759912097613883
393:	0.024386712312609266
394:	0.025111535322219523
395:	0.025843807068256505
396:	0.026617297248170215
397:	0.027325155234863632
398:	0.028148818998973365
399:	0.028891292002599173
400:	0.029778818423364176
401:	0.030569342166816296
402:	0.0314983373727008
403:	0.03233653855465053
404:	0.03355463443128539
405:	0.03442948518113686
406:	0.035476276708903395
407:	0.0363981769128129
408:	0.03750509724029744
409:	0.038473421067846895
410:	0.039642069790958315
411:	0.040669321363682384
412:	0.041911407500972345
413:	0.04297448299251262
414:	0.04429121749808957
4