# An example of numpy

[Ref](https://github.com/jcjohnson/pytorch-examples#warm-up-numpy)

In [1]:
import numpy as np

N, D_in, H, D_out = 64, 1000, 100, 10
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

loss = (y - y')^2
grad = D(loss) = 2 * (y - y')

From this part, we will know how to back prop

In [2]:
learning_rate = 1e-6
for t in range(500):
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)
    
    loss = np.square(y_pred - y).sum()
    print(t, loss)
    
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 28379264.1541
1 22592861.3316
2 21819930.7089
3 22526947.3178
4 22439549.8088
5 20034965.1955
6 15575604.4246
7 10539978.6395
8 6493060.96062
9 3834343.88414
10 2301900.23016
11 1459490.93864
12 997584.05121
13 733869.383068
14 573541.288196
15 468028.141188
16 392984.716294
17 336231.535632
18 291308.374709
19 254589.49716
20 223911.454163
21 197887.2457
22 175580.006491
23 156342.932947
24 139633.972068
25 125040.403104
26 112259.103479
27 101018.89215
28 91100.2681389
29 82324.9482786
30 74538.5715723
31 67616.1816783
32 61437.1792924
33 55916.5096437
34 50983.6268388
35 46553.153432
36 42566.878146
37 38971.6373381
38 35723.1992704
39 32786.1014217
40 30124.5659233
41 27709.1610963
42 25515.4336091
43 23519.546644
44 21701.0666405
45 20045.6434629
46 18538.5969521
47 17160.0749296
48 15898.9192419
49 14741.4732546
50 13678.9830339
51 12702.4103067
52 11803.8956123
53 10976.5805077
54 10214.416594
55 9511.65376573
56 8862.53537711
57 8262.85182713
58 7708.29383035
59 7195.07698005

# PyTorch: Autograd

In [20]:
# Code in file autograd/two_layer_net_autograd.py
import torch
from torch.autograd import Variable

dtype = torch.FloatTensor
# dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and outputs, and wrap them in Variables.
# Setting requires_grad=False indicates that we do not need to compute gradients
# with respect to these Variables during the backward pass.
x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)

# Create random Tensors for weights, and wrap them in Variables.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Variables during the backward pass.
w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)
w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)

In [23]:
learning_rate = 1e-6

for t in range(500):
  y_pred = x.mm(w1).clamp(min=0).mm(w2)
  loss = (y_pred - y).pow(2).sum()
  print(t, loss.data[0])
  w1.grad.data.zero_()
  w2.grad.data.zero_()
  loss.backward()
  w1.data -= learning_rate * w1.grad.data
  w2.data -= learning_rate * w2.grad.data

0 26620730.0


AttributeError: 'NoneType' object has no attribute 'data'