In [4]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [6]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10


# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

print('x:',x.shape)
print('y:',y.shape)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

(64, 1000)
(64, 10)


In [3]:
learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 36916596.505
1 34217905.2308
2 31715701.8279
3 25366558.1638
4 17044492.5693
5 9820468.16877
6 5403401.62991
7 3103421.28983
8 1981969.65343
9 1407809.70094
10 1083743.36177
11 877492.118192
12 732058.947817
13 621833.578951
14 534333.264705
15 462916.487753
16 403637.498073
17 353811.512743
18 311527.141064
19 275430.453072
20 244416.849649
21 217650.999005
22 194441.246037
23 174236.101696
24 156612.38064
25 141136.06555
26 127505.050884
27 115450.898515
28 104771.342984
29 95267.32544
30 86785.4171157
31 79201.3952049
32 72403.4096576
33 66293.137686
34 60790.3254723
35 55822.9646216
36 51330.295204
37 47261.4128926
38 43570.8363896
39 40221.9957939
40 37172.5652471
41 34391.0675495
42 31849.8380289
43 29525.8516298
44 27397.4024168
45 25445.1528809
46 23652.7624125
47 22004.9412626
48 20488.4653351
49 19090.7172045
50 17801.4915509
51 16611.5083001
52 15511.159204
53 14493.3528279
54 13551.2182494
55 12677.4584982
56 11866.9793585
57 11114.6862426
58 10415.751694
59 9765.72665423

470 0.000254582010159
471 0.00024567987059
472 0.000237090817433
473 0.000228804128911
474 0.000220822924536
475 0.000213112718575
476 0.000205670028731
477 0.000198489277184
478 0.000191560916521
479 0.000184876280811
480 0.000178435103271
481 0.000172215491696
482 0.000166210229682
483 0.000160417899886
484 0.000154826450412
485 0.000149431462929
486 0.000144230066661
487 0.000139212121292
488 0.00013436437881
489 0.000129686246573
490 0.000125172163631
491 0.000120816471108
492 0.00011661430249
493 0.000112564706117
494 0.000108649923273
495 0.000104872059432
496 0.000101226959654
497 9.77088605799e-05
498 9.43138765347e-05
499 9.10421829855e-05
