# Backprop by Autograd

## Autograd Basics

In [1]:
import torch

In [2]:
x = torch.ones(1)
x.requires_grad

False

In [3]:
y = torch.ones(1)
y.requires_grad

False

In [4]:
z = x + y

In [5]:
z.requires_grad

False

In [6]:
x.requires_grad_()
x.requires_grad

True

In [7]:
z = x + y

In [8]:
z.requires_grad

True

## Backward

In [9]:
y = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32, requires_grad=True)
y

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

In [12]:
z = 2*y + 1
z

tensor([[3., 5.],
        [7., 9.]], grad_fn=<AddBackward0>)

In [13]:
print("y.requires_grad :", y.requires_grad)
print("z.requires_grad :", z.requires_grad)

print("")

print("y.grad :", y.grad)
print("z.grad :", z.grad)

print("")

print("y.grad_fn :", y.grad_fn)
print("z.grad_fn :", z.grad_fn)

y.requires_grad : True
z.requires_grad : True

y.grad : None
z.grad : None

y.grad_fn : None
z.grad_fn : <AddBackward0 object at 0x000001F165922790>


  print("z.grad :", z.grad)


In [14]:
gz = z.grad_fn
print("z.grad_fn :", gz)
print("Where is z.grad_fn?", id(gz))
print("Where is z.grad_fn in hex?", hex(id(gz)))

z.grad_fn : <AddBackward0 object at 0x000001F16489F670>
Where is z.grad_fn? 2136285509232
Where is z.grad_fn in hex? 0x1f16489f670


In [15]:
# next_function을 통해 다음 backward를 파악할 수 있음.
z.grad_fn.next_functions

((<MulBackward0 at 0x1f16595ad90>, 0), (None, 0))

In [16]:
# grad can be implicitly created only for scalar outputs
z.backward()

RuntimeError: grad can be implicitly created only for scalar outputs

In [17]:
out = z.sum()
out

tensor(24., grad_fn=<SumBackward0>)

In [18]:
print("y.requires_grad :", y.requires_grad)
print("z.requires_grad :", z.requires_grad)
print("out.requires_grad :", out.requires_grad)

print("")

print("y.grad :", y.grad)
print("z.grad :", z.grad)
print("out.grad :", out.grad)

print("")

print("y.grad_fn :", y.grad_fn)
print("z.grad_fn :", z.grad_fn)
print("out.grad_fn :", out.grad_fn)

y.requires_grad : True
z.requires_grad : True
out.requires_grad : True

y.grad : None
z.grad : None
out.grad : None

y.grad_fn : None
z.grad_fn : <AddBackward0 object at 0x000001F16489F670>
out.grad_fn : <SumBackward0 object at 0x000001F17F0509A0>


  print("z.grad :", z.grad)
  print("out.grad :", out.grad)


In [19]:
out.backward()

In [20]:
print("y.requires_grad :", y.requires_grad)
print("z.requires_grad :", z.requires_grad)
print("out.requires_grad :", out.requires_grad)

print("")

print("y.grad :", y.grad)
print("z.grad :", z.grad)
print("out.grad :", out.grad)

print("")

print("y.grad_fn :", y.grad_fn)
print("z.grad_fn :", z.grad_fn)
print("out.grad_fn :", out.grad_fn)

y.requires_grad : True
z.requires_grad : True
out.requires_grad : True

y.grad : tensor([[2., 2.],
        [2., 2.]])
z.grad : None
out.grad : None

y.grad_fn : None
z.grad_fn : <AddBackward0 object at 0x000001F16489F670>
out.grad_fn : <SumBackward0 object at 0x000001F17F053220>


  print("z.grad :", z.grad)
  print("out.grad :", out.grad)


In [21]:
# By default, gradients are only retained for leaf variables. non-leaf variables
# gradients are not retained to be inspected later. This was done by design, to save memory.
print("y == leaf? : ", y.is_leaf)
print("z == leaf? : ", z.is_leaf)

y == leaf? :  True
z == leaf? :  False


## Backward Twice

In [35]:
y = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32, requires_grad=True)
z = 2*y + 1
out = z.sum()
out.backward()
out.backward()

RuntimeError: Trying to backward through the graph a second time (or directly access saved variables after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved variables after calling backward.

In [36]:
y = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32, requires_grad=True)
z = 2*y + 1
out = z.sum()
out.backward(retain_graph=True)
out.backward()

In [37]:
y.grad

tensor([[4., 4.],
        [4., 4.]])

## .detach() vs .data

In [22]:
# Graph 위에 있는 변수는 바로 numpy로 변환할 수 없음.
z.numpy()

RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.

In [23]:
# 따라서 .detach와 .data 중 하나를 사용해야함.
z.detach().numpy()

array([[3., 5.],
       [7., 9.]], dtype=float32)

In [24]:
# 따라서 .detach와 .data 중 하나를 사용해야함.
z.data.numpy()

array([[3., 5.],
       [7., 9.]], dtype=float32)

@ .detach된 tensor는 requires_grad가 False이지만 기존 grad에 inplace 알려줌

In [25]:
a = torch.tensor([1,2,3.], requires_grad = True)
b = a.exp()

# c는 b를 graph에서 떼어내어 require gradient = False
# 허나, 원본 데이터는 공유
c = b.detach()
c.zero_()

tensor([0., 0., 0.])

In [26]:
# c를 초기화하였으므로, 공유된 데이터를 사용하는 b도 초기화
b

tensor([0., 0., 0.], grad_fn=<ExpBackward>)

In [27]:
# exp 함수는 자기 자신이 역전파에 필요
# 허나, 원본 데이터가 수정되었으므로 에러가 나는 것이 맞음
b.sum().backward()

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [3]], which is output 0 of ExpBackward, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

In [28]:
# backward가 진행되지 않았으므로, a의 grad도 없어야 함
a.grad

@ .data된 tensor는 grad에 변화를 줄 수 없음

In [29]:
a = torch.tensor([1,2,3.], requires_grad = True)
b = a.exp()

# c를 초기화
c = b.data
c.zero_()

tensor([0., 0., 0.])

In [30]:
# 마찬가지로 b도 초기화
b

tensor([0., 0., 0.], grad_fn=<ExpBackward>)

In [31]:
# inplace가 일어났음에도 에러가 뜨지 않음(비정상)
b.sum().backward()

In [32]:
# 잘못된 결과
a.grad

tensor([0., 0., 0.])

방지하기 위해서는 detach().clone()을 사용

In [33]:
a = torch.tensor([1,2,3.], requires_grad = True)
b = a.exp()

# c를 초기화
c = b.detach().clone()
c.zero_()

tensor([0., 0., 0.])

In [34]:
b

tensor([ 2.7183,  7.3891, 20.0855], grad_fn=<ExpBackward>)