# An example of numpy

[Ref](https://github.com/jcjohnson/pytorch-examples#warm-up-numpy)

In [33]:
import numpy as np

N, D_in, H, D_out = 64, 1000, 100, 10
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

loss = (y - y')^2
grad = D(loss) = 2 * (y - y')

From this part, we will know how to back prop

In [34]:
learning_rate = 1e-6
for t in range(500):
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)
    
    loss = np.square(y_pred - y).sum()
    print(t, loss)
    
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 34299101.1643
1 29565503.9655
2 27568741.3644
3 23742808.9837
4 18006019.3596
5 11768393.693
6 7123660.72201
7 4214016.4084
8 2623557.79083
9 1761001.56484
10 1280515.18105
11 990929.350064
12 801148.999889
13 666675.734781
14 565450.764787
15 485693.540935
16 420932.59177
17 367243.724993
18 322187.594216
19 283971.504612
20 251301.21809
21 223171.458734
22 198852.290592
23 177697.030838
24 159218.299999
25 143009.708
26 128756.788459
27 116183.273273
28 105053.773652
29 95180.1233597
30 86402.4069531
31 78565.6974445
32 71552.1289647
33 65265.0073094
34 59616.0709188
35 54532.0029776
36 49946.026287
37 45807.1475574
38 42062.0876179
39 38666.2964944
40 35584.2853616
41 32782.0828264
42 30231.4774025
43 27907.9749146
44 25787.6057519
45 23852.6450449
46 22082.832987
47 20462.6055951
48 18977.0471504
49 17613.6788784
50 16360.124457
51 15207.2162411
52 14145.1086254
53 13165.8754219
54 12263.1031313
55 11429.9404792
56 10659.6499393
57 9947.31242726
58 9287.87205449
59 8677.07821408


# PyTorch: Autograd

In [35]:
# Code in file autograd/two_layer_net_autograd.py
import torch
from torch.autograd import Variable

dtype = torch.FloatTensor
# dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and outputs, and wrap them in Variables.
# Setting requires_grad=False indicates that we do not need to compute gradients
# with respect to these Variables during the backward pass.
x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)

# Create random Tensors for weights, and wrap them in Variables.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Variables during the backward pass.
w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)
w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)

In [36]:
learning_rate = 1e-6

for t in range(500):
  y_pred = x.mm(w1).clamp(min=0).mm(w2)
  loss = (y_pred - y).pow(2).sum()
  print(t, loss.data[0])

  loss.backward()
    
  w1.data -= learning_rate * w1.grad.data
  w2.data -= learning_rate * w2.grad.data
    
  # Manually zero the gradients before running the backward pass
  w1.grad.data.zero_()
  w2.grad.data.zero_()

0 32100066.0
1 25145376.0
2 21534856.0
3 17973230.0
4 13977870.0
5 9971399.0
6 6698530.0
7 4345907.5
8 2830358.75
9 1890046.75
10 1316222.5
11 958731.4375
12 728420.375
13 573663.3125
14 464794.96875
15 384747.84375
16 323439.8125
17 275032.125
18 235916.90625
19 203784.75
20 177041.6875
21 154504.125
22 135407.484375
23 119085.765625
24 105052.453125
25 92934.234375
26 82421.640625
27 73271.3515625
28 65264.26953125
29 58250.859375
30 52090.421875
31 46662.41796875
32 41867.90625
33 37624.62109375
34 33861.05078125
35 30516.4609375
36 27552.943359375
37 24911.75
38 22550.73046875
39 20437.203125
40 18542.623046875
41 16840.609375
42 15307.470703125
43 13926.1845703125
44 12682.4267578125
45 11560.34375
46 10547.2275390625
47 9630.9990234375
48 8800.4638671875
49 8047.419921875
50 7364.1611328125
51 6743.78271484375
52 6180.08349609375
53 5666.763671875
54 5199.29638671875
55 4773.18798828125
56 4384.5048828125
57 4029.81396484375
58 3705.614990234375
59 3408.91162109375
60 3137.516845