In [1]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [3]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.

N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)
    
    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)
    
    # Backprop to compute gradients of w1 and w2 w.r.t loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 25062881.13882157
1 18238975.5298796
2 14352044.625412391
3 11543622.878066868
4 9155889.026564527
5 7099258.513963898
6 5364418.7243817
7 3991124.7868398535
8 2946620.393107867
9 2185207.7897777935
10 1638073.8118642322
11 1250436.81346939
12 973529.3187147267
13 774451.437544618
14 628463.2971681902
15 519319.51721018256
16 435791.11812090175
17 370544.23646327085
18 318501.6400026876
19 276347.70534549805
20 241511.36002380087
21 212341.8132243928
22 187618.45674357814
23 166492.39023319527
24 148270.09510010478
25 132486.9299690744
26 118724.2792023516
27 106684.35136235035
28 96108.11421600278
29 86801.64322157175
30 78545.93911146857
31 71192.50825437211
32 64635.290017491825
33 58770.59835698604
34 53520.87616870368
35 48806.90207758742
36 44561.5658470536
37 40732.04758233975
38 37273.6283769807
39 34143.092957058194
40 31305.550286840262
41 28730.575835768526
42 26390.2407564611
43 24261.53005172263
44 22321.150520825737
45 20551.71065641466
46 18936.822145831567
47 17461.11

402 9.792998018963199e-05
403 9.339723814961843e-05
404 8.907508822414144e-05
405 8.4953267500717e-05
406 8.1022794292575e-05
407 7.727582025501889e-05
408 7.370281228964666e-05
409 7.029650315093908e-05
410 6.705016631899893e-05
411 6.395168257834687e-05
412 6.0996207838030095e-05
413 5.817763541121568e-05
414 5.549004159038782e-05
415 5.292719019922301e-05
416 5.048357172703679e-05
417 4.8152853179539756e-05
418 4.5929781145917725e-05
419 4.3809651022279603e-05
420 4.178811330500343e-05
421 3.9861434636553693e-05
422 3.802238906648346e-05
423 3.626885810836465e-05
424 3.4596726557329205e-05
425 3.300179667786599e-05
426 3.1480302181485025e-05
427 3.0029057929040166e-05
428 2.864490444612616e-05
429 2.7324836510176968e-05
430 2.60658246770197e-05
431 2.4865110851361124e-05
432 2.3720974501740052e-05
433 2.2628594870708036e-05
434 2.1586736531469088e-05
435 2.05928047230151e-05
436 1.9644866976875596e-05
437 1.874071533443272e-05
438 1.7878536032906285e-05
439 1.7056042973611134e-05
44