## 1-2 AUTOGRAD: AUTOMATIC DIFFERENTIATION

In [1]:
import torch

In [2]:
x = torch.ones(2, 2, requires_grad=True)

In [3]:
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


### backward 는 스칼라 아웃풋일 때만 가능

In [4]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [5]:
print(y.grad_fn)

<AddBackward0 object at 0x7f8c083225f8>


In [8]:
x.grad

In [11]:
y.backward()

RuntimeError: grad can be implicitly created only for scalar outputs

In [12]:
z = x.sum()
print(z)
z.backward()

tensor(4., grad_fn=<SumBackward0>)


In [13]:
y.grad

In [14]:
x.grad

tensor([[1., 1.],
        [1., 1.]])

### mean 도 스칼라를 만든다

In [15]:
x = torch.ones(2, 2, requires_grad=True)

In [16]:
y = x * x * 3
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<MulBackward0>)


In [17]:
z = y.mean()
print(z)

tensor(3., grad_fn=<MeanBackward1>)


In [18]:
z.backward()

In [21]:
y.grad
y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<MulBackward0>)

In [22]:
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [23]:
x.grad # 3*2*a 에다가 mean 이니까 3*2*a / 4 이니까, a = 1이 들어가 3/2

tensor([[1.5000, 1.5000],
        [1.5000, 1.5000]])

### norm 도 스칼라를 만든다

In [24]:
x = torch.randn(3, requires_grad=True)
print(x)

y = x * 2
while y.data.norm() < 1000:
    y = y * 2

print(y)

tensor([ 0.4263, -1.2995,  0.3972], requires_grad=True)
tensor([  436.4805, -1330.6852,   406.7566], grad_fn=<MulBackward0>)


In [31]:
torch.tensor([3.,4.]).norm()

tensor(5.)

In [33]:
z = y.sum()

In [37]:
z

tensor(-487.4481, grad_fn=<SumBackward0>)

In [34]:
z.backward()

In [36]:
y

tensor([  436.4805, -1330.6852,   406.7566], grad_fn=<MulBackward0>)

In [35]:
y.grad

In [38]:
x.grad

tensor([1024., 1024., 1024.])

In [39]:
x

tensor([ 0.4263, -1.2995,  0.3972], requires_grad=True)

In [40]:
1000/1024

0.9765625

### 한번 backward 했으면 기본적으로 다시 할 수 없는데 retain_graph=True 라는 옵션도 있단다

In [43]:
z.backward()

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [42]:
y.backward()

RuntimeError: grad can be implicitly created only for scalar outputs

In [44]:
y.backward(torch.tensor([1., 0.1, 0.01]))

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

### 마지막 연산결과가 스칼라가 아니라, 벡터라도, 벡터의 요소 스칼라별로 차원을 주면 backward 할 수 있다

In [45]:
x = torch.randn(3, requires_grad=True)
print(x)

y = x * 2
while y.data.norm() < 1000:
    y = y * 2

print(y)

tensor([-0.1447, -0.1552, -0.8142], requires_grad=True)
tensor([ -296.3103,  -317.9381, -1667.4033], grad_fn=<MulBackward0>)


In [46]:
y.backward(torch.tensor([1., 0.1, 0.001]))

In [47]:
y.grad

In [48]:
x.grad

tensor([2048.0000,  204.8000,    2.0480])

### backward 한 다음에 다시 연산을 추가하더라도 여하튼 중간에 backward 해버리면 또 못한다

In [49]:
y

tensor([ -296.3103,  -317.9381, -1667.4033], grad_fn=<MulBackward0>)

In [50]:
z = y.sum()

In [51]:
z.backward()

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

### requires_grad 와 no_grad 함수

In [52]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False
