1、 Hand code a 3-layer NN with binary cross entropy loss ( only NumPy allowed ), including bias term ( B) 

2、 Hand code a 3 layer NN with leaky relu as activation,  linear output and MSE as loss 

In [1]:
import numpy as np

In [2]:
N, D_in, H1, H2, D_out = 64, 1000, 128, 32, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H1)*0.01
w2 = np.random.randn(H1, H2)*0.01
w3 = np.random.randn(H2, D_out)*0.01
b1 = np.zeros(H1)
b2 = np.zeros(H2)
b3 = np.zeros(D_out)

eta = 1e-3

In [3]:
def relu(x):
    A = np.maximum(0, x)
    return A, x

def leaky_relu(Z):
    A = np.maximum(0.1 * Z, Z)
    return A, Z


In [4]:
def forward(x, w1, w2, w3, b1, b2, b3):
    Z1 = np.dot(x, w1) + b1
    A1, Z1 = leaky_relu(Z1)

    Z2 = np.dot(A1, w2) + b2
    A2, Z2 = leaky_relu(Z2)

    Z3 = np.dot(A2, w3) + b3
    A3 = Z3
    return A1, A2, A3

# _, _, A3 = forward(x, w1, w2, w3, b1, b2, b3)
# print(A3)

In [5]:
def leaky_relu_gradient(A):
    dA = np.ones_like(A)
    dA[A < 0] = 0.1
    return dA

In [6]:
def backward(A1, A2, A3, W1, W2, W3, b1, b2, b3, x, y, eta):
    dA2 = leaky_relu_gradient(A2)
    dA1 = leaky_relu_gradient(A1)

    delta3 = 2.0 * (A3 - y)
    delta2 = dA2 * (delta3@W3.T)
    delta1 = dA1 * (delta2@W2.T)
    
    W3 = W3 - eta * (A2.T@delta3)
    W2 = W2 - eta * (A1.T@delta2)
    W1 = W1 - eta * (x.T@delta1)
    
    b3 = b3 - eta * delta3
    b2 = b2 - eta * delta2
    b1 = b1 - eta * delta1
    
    return W1, W2, W3, b1, b2, b3

In [7]:
for t in range(500):
    A1, A2, A3 = forward(x, w1, w2, w3, b1, b2, b3)
    
    loss = np.square(A3 - y).sum()
    print(f"{t}:\t{loss}")

    w1, w2, w3, b1, b2, b3 = backward(A1, A2, A3, w1, w2, w3, b1, b2, b3, x, y, eta)

0:	651.2828747661738
1:	651.2801989987558
2:	651.2775232418867
3:	651.2748474955652
4:	651.2721717597904
5:	651.2694960345609
6:	651.2668203198757
7:	651.2641446157336
8:	651.2614689221336
9:	651.2587932390745
10:	651.2561175665551
11:	651.2534419045743
12:	651.2507662531309
13:	651.2480906122239
14:	651.2454149818519
15:	651.242739362014
16:	651.240063752709
17:	651.2373881539357
18:	651.2347125656931
19:	651.2320369879799
20:	651.2293614207949
21:	651.2266858641374
22:	651.2240103180056
23:	651.2213347823989
24:	651.218659257316
25:	651.2159837427556
26:	651.2133082387168
27:	651.2106327451982
28:	651.2079572621988
29:	651.2052817897174
30:	651.2026063277531
31:	651.1999308763045
32:	651.1972554353706
33:	651.1945800049502
34:	651.191904585042
35:	651.1892291756451
36:	651.1865537767584
37:	651.1838783883804
38:	651.1812030105104
39:	651.1785276431469
40:	651.175852286289
41:	651.1731769399355
42:	651.1705016040852
43:	651.167826278737
44:	651.1651509638897
45:	651.1624756595423
46:	

446:	650.0905926196403
447:	650.0879219333974
448:	650.0852512571632
449:	650.0825805909367
450:	650.0799099347164
451:	650.0772392885016
452:	650.074568652291
453:	650.0718980260833
454:	650.0692274098776
455:	650.0665568036727
456:	650.0638862074675
457:	650.061215621261
458:	650.0585450450517
459:	650.0558744788389
460:	650.0532039226212
461:	650.0505333763975
462:	650.0478628401669
463:	650.045192313928
464:	650.0425217976799
465:	650.0398512914212
466:	650.037180795151
467:	650.0345103088682
468:	650.0318398325714
469:	650.0291693662598
470:	650.0264989099321
471:	650.023828463587
472:	650.0211580272239
473:	650.0184876010467
474:	650.0158171852081
475:	650.013146779261
476:	650.0104763831052
477:	650.0078059973838
478:	650.0051356298975
479:	650.0024653145053
480:	649.9997950091008
481:	649.9971247134265
482:	649.9944544282203
483:	649.9917841525233
484:	649.9891138870129
485:	649.9864436315379
486:	649.9837733857607
487:	649.9811031504191
488:	649.9784329246348
489:	649.97576270