In [None]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [1]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 29998588.763783317
1 29272922.33443578
2 36774794.01259367
3 46792916.11940954
4 49388149.43712423
5 37142263.9704033
6 18736470.005108096
7 7221895.524730694
8 2911032.530135719
9 1562843.9784918837
10 1090684.0071936054
11 867911.249881259
12 727174.3821829073
13 622195.8444858437
14 538139.6894613111
15 468761.87158329994
16 410610.7313539154
17 361422.5961167588
18 319494.0031883376
19 283540.5790239223
20 252503.38680574484
21 225571.59676044213
22 202098.82084234763
23 181552.77658282177
24 163493.99478013278
25 147577.45818541734
26 133542.59433543758
27 121083.68209470766
28 109989.65771434565
29 100084.24034791352
30 91218.4592373317
31 83258.35466645141
32 76102.2797886378
33 69648.7436355197
34 63824.08699594437
35 58554.93464617352
36 53786.32485272916
37 49459.14515985453
38 45528.400346779934
39 41952.24454253974
40 38692.22504088228
41 35719.02763452708
42 33000.656145958914
43 30512.243969509884
44 28234.316279316037
45 26145.888220132612
46 24228.55646486931
47 22466

392 0.00025336533833844626
393 0.0002418281001543319
394 0.00023081917794432716
395 0.00022031792463432788
396 0.00021029319114377026
397 0.00020072887892786038
398 0.00019159952590044367
399 0.0001828883591413042
400 0.00017457685237168524
401 0.00016664527910252645
402 0.00015907417983219063
403 0.0001518502591213508
404 0.00014495776630877162
405 0.00013837857937460437
406 0.00013209826241212516
407 0.00012610562995600752
408 0.00012038599329057771
409 0.00011492734510782309
410 0.00010971679177127226
411 0.00010474379656396755
412 9.99979218050226e-05
413 9.546837604763088e-05
414 9.114596641035484e-05
415 8.701933015620683e-05
416 8.308026265357704e-05
417 7.93202510470218e-05
418 7.57309029454928e-05
419 7.230531150044785e-05
420 6.903534789762249e-05
421 6.59141726148917e-05
422 6.29348610719009e-05
423 6.009116363728936e-05
424 5.737645375612792e-05
425 5.478459735208472e-05
426 5.231028563506505e-05
427 4.994851266951753e-05
428 4.769369650606839e-05
429 4.554098404720245e-05


In [2]:
import numpy as np

N, D_in, H, D_out = 64, 1000, 100, 10
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6

for i in range(500):
    # forward pass 
    h = x.dot(w1) # N * H
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2) # N * D_out
    
    loss = np.square(y_pred-y).sum()
    print(t, loss)
    
    # backprop
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

499 28529250.037187114
499 23883226.681441873
499 20599968.16313069
499 16945131.92922391
499 12841566.214822864
499 9009612.660498504
499 6016189.745604845
499 3964649.6543431836
499 2660981.720581793
499 1856535.887145494
499 1356121.0705204485
499 1035329.7514185429
499 820119.1982174846
499 668546.6516299136
499 556779.0512197963
499 470940.1058426277
499 402960.1704097657
499 347885.0051550929
499 302455.98516338255
499 264402.7379333128
499 232225.32113590481
499 204803.57084254193
499 181284.8506519547
499 161002.5034071471
499 143416.1190439611
499 128100.00020134184
499 114718.69354375821
499 102984.22320191684
499 92665.40171997107
499 83560.03497713304
499 75499.57318089777
499 68352.56498826582
499 62004.79669314375
499 56345.7125925399
499 51288.76029686563
499 46754.741581919574
499 42683.166870149944
499 39019.44794692729
499 35715.92120136795
499 32734.587940924728
499 30039.006070274565
499 27596.84742850144
499 25379.61955840986
499 23363.95994757598
499 21529.9398244

499 0.001479782858776325
499 0.0014172763549824728
499 0.001357413283822191
499 0.001300088685135063
499 0.0012452004247593559
499 0.0011926392220204056
499 0.0011423055969384908
499 0.0010941052389370296
499 0.0010479513915491751
499 0.0010037442392397582
499 0.0009614160553835664
499 0.0009208806595620018
499 0.0008820582618188307
499 0.000844879989295525
499 0.0008092791300635118
499 0.0007751809152582138
499 0.0007425239700864438
499 0.0007112559420380386
499 0.0006813042667084017
499 0.0006526192967141823
499 0.0006251504774345124
499 0.0005988376413574104
499 0.0005736384544758719
499 0.0005495059853893329
499 0.0005263906524774066
499 0.0005042517447078998
499 0.00048305166013172014
499 0.00046274176431820656
499 0.00044328887734908944
499 0.0004246605264510855
499 0.0004068153598207123
499 0.00038972314060208566
499 0.0003733532005115066
499 0.0003576708147636136
499 0.0003426517537384263
499 0.000328265228476837
499 0.00031448382303951247
499 0.00030128498519096784
499 0.00028