In [1]:
import torch

## Example 1

In [2]:
x = torch.rand((2,2))
y = torch.rand((2,2))
z = torch.rand((2,2), requires_grad=True)
a = x + y
b = a + z

In [3]:
print(f'x: {x.requires_grad}, y: {y.requires_grad}, z: {z.requires_grad}')
print(f'a: {a.requires_grad}, b: {b.requires_grad}')

x: False, y: False, z: True
a: False, b: True


In [4]:
print(f'x: {x.grad_fn}, y: {y.grad_fn}, z: {z.grad_fn}')
print(f'a: {a.grad_fn}, b: {b.grad_fn}')

x: None, y: None, z: None
a: None, b: <AddBackward0 object at 0x7ff45b16a1d0>


## Example 2

In [5]:
x = torch.ones((2,2), requires_grad=True)
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [6]:
y = x + 2
y, y.requires_grad

(tensor([[3., 3.],
         [3., 3.]], grad_fn=<AddBackward0>), True)

In [7]:
z = y*y*3
out = z.mean()
print(z)
print(out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward0>)


In [8]:
print(x.grad_fn)
print(y.grad_fn)
print(z.grad_fn)
print(out.grad_fn)

None
<AddBackward0 object at 0x7ff4ac9a0f50>
<MulBackward0 object at 0x7ff45b16a2d0>
<MeanBackward0 object at 0x7ff4ac9a0f50>


In [9]:
out.backward()

In [10]:
a = torch.randn(2, 2, requires_grad=False) 
a = ((a * 3) / (a - 1))
b = (a * a).sum()
print(b.requires_grad, a.requires_grad)
print(b.grad_fn)

False False
None


In [11]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
with torch.no_grad():
    b = (a * a).sum()

print(b.requires_grad, b.grad_fn)

False None


## Homework

"Produce Back Propagation for two layer Neural Network", Loss function is L2_Loss

In [12]:
import torch
device = torch.device('cpu')

In [15]:
# N: batch size
# D_in: input dimension
# H: hidden dimension
# D_out: output dimension
N, D_in, H, D_out = 64, 1000, 100, 10

# random generate x, y
###<your code>###
x = torch.randn((N,D_in))
y = torch.randn((N,D_out))
# initial weight 1, 2
###<your code>###
W1 = torch.randn((D_in,H))
W2 = torch.randn((H,D_out))
# 設置learning rate
learning_rate = 1e-6

# 訓練500個epoch
for t in range(500):
  # 向前傳遞: 計算y_pred
  ###<your code>###
  y_pred = torch.matmul(torch.relu(torch.matmul(x, W1)),W2)
  # 計算loss
  loss = ((y_pred-y)**2).sum()
  ###<your code>###
  print(t, loss.item())

  # 倒傳遞: 計算W1與W2對loss的微分(梯度)
  ###<your code>###
  L2_loss_grad = 2. * (y_pred - y)
  W2_grad = torch.relu(torch.matmul(x, W1)).T.matmul(L2_loss_grad)
  h_grad = L2_loss_grad.matmul(W2.T) * (torch.matmul(x, W1) > 0.)
  W1_grad = x.T.matmul(h_grad)
  # 參數更新
  ###<your code>###
  W1.data -= learning_rate * W1_grad
  W2.data -= learning_rate * W2_grad

0 34884880.0
1 34690084.0
2 37381536.0
3 35959340.0
4 27688812.0
5 16570282.0
6 8336928.5
7 4044473.0
8 2172476.25
9 1363125.25
10 979577.5
11 766489.25
12 628643.125
13 528787.375
14 451358.21875
15 389126.65625
16 337884.21875
17 295144.25
18 259119.625
19 228520.890625
20 202325.65625
21 179836.171875
22 160411.28125
23 143550.15625
24 128871.8046875
25 116021.8125
26 104700.1796875
27 94699.40625
28 85833.53125
29 77953.5625
30 70934.1328125
31 64659.10546875
32 59035.4140625
33 53982.62890625
34 49437.06640625
35 45333.65234375
36 41622.9765625
37 38259.06640625
38 35205.09375
39 32430.19140625
40 29902.865234375
41 27600.359375
42 25499.634765625
43 23578.77734375
44 21824.314453125
45 20217.767578125
46 18742.705078125
47 17387.666015625
48 16141.1259765625
49 14992.7470703125
50 13934.8671875
51 12959.306640625
52 12058.5732421875
53 11226.408203125
54 10456.7626953125
55 9744.4375
56 9085.205078125
57 8476.640625
58 7912.6103515625
59 7389.3720703125
60 6903.7138671875
61 6452