In [58]:
import torch

In [59]:
# Create a tensor and set requires_grad=True to track computation with it
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [60]:
# Do tensor operation
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [61]:
# y was created by tensor operation, so it has grad_fn
print(x)
print(y)
print(y.grad_fn)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x1123cf400>


In [62]:
# more operation
z = y * y * 3
out = z.mean()
print(z)
print(out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward1>)


In [63]:
# tensor without grad
x_prime = torch.ones(2, 2)
y_prime = x_prime + 2
print(x_prime.grad_fn, y_prime.grad_fn)
print(x_prime)
print(y_prime)

None None
tensor([[1., 1.],
        [1., 1.]])
tensor([[3., 3.],
        [3., 3.]])


In [64]:
# .requires_grad_( ... ) changes an existing Tensor’s requires_grad flag in-place. The input flag defaults to False if not given.
# .requires_grad_(True) => in-place로 requires_grad 값을 바꿔줌
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad) # False
a.requires_grad_(True)
print(a.requires_grad) # True
print(a.grad_fn) # None
b = (a * a).sum()
print(b.grad_fn) # sumbackword

False
True
None
<SumBackward0 object at 0x1123cfe80>


In [65]:
temp = torch.tensor(1)
print(temp, temp.size()) # scalar는 size가 없음, matrix부터 size가 있음

tensor(1) torch.Size([])


In [66]:
# backprop
# .backward()를 call하면, 해당 tensor에 대한 gradient값이 각각 .grad에 accumulate됨
out.backward()

In [67]:
print(x.grad) # dout/dx
print(z.grad) # dout/dz?
print(y.grad) # dout/dy?

print(y.grad_fn, y.requires_grad)
print(z.grad_fn, z.requires_grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
None
None
<AddBackward0 object at 0x1123cfb00> True
<MulBackward0 object at 0x1123cfa90> True


In [52]:
x = torch.randn(3, requires_grad=True) # track history
y = x * 2
while y.data.norm() < 1000:
    print(y.data, type(y.data), y.data.norm())
    y = y * 2
    
print(y)

tensor([ 1.0182,  1.6517, -1.0267]) <class 'torch.Tensor'> tensor(2.1952)
tensor([ 2.0364,  3.3034, -2.0534]) <class 'torch.Tensor'> tensor(4.3904)
tensor([ 4.0728,  6.6068, -4.1068]) <class 'torch.Tensor'> tensor(8.7809)
tensor([ 8.1457, 13.2135, -8.2137]) <class 'torch.Tensor'> tensor(17.5617)
tensor([ 16.2913,  26.4271, -16.4273]) <class 'torch.Tensor'> tensor(35.1234)
tensor([ 32.5826,  52.8541, -32.8547]) <class 'torch.Tensor'> tensor(70.2468)
tensor([ 65.1653, 105.7083, -65.7094]) <class 'torch.Tensor'> tensor(140.4937)
tensor([ 130.3306,  211.4165, -131.4187]) <class 'torch.Tensor'> tensor(280.9873)
tensor([ 260.6612,  422.8330, -262.8374]) <class 'torch.Tensor'> tensor(561.9747)
tensor([ 521.3224,  845.6661, -525.6748], grad_fn=<MulBackward0>)


In [53]:
# Now in this case y is no longer a scalar. torch.autograd could not compute the full Jacobian directly,
# but if we just want the vector-Jacobian product, simply pass the vector to backward as argument:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float) # make tensor from array
y.backward(v) # do backward propagation as if v is incoming gradient
print(x.grad)

tensor([1.0240e+02, 1.0240e+03, 1.0240e-01])


In [56]:
# You can also stop autograd from tracking history on Tensors with .requires_grad=True
# by wrapping the code block in with torch.no_grad():
# with torch.no_grad()를 이용해서 일시적으로 track하지 않게 할 수 있음
print(x.requires_grad)
print((x**2).requires_grad)

with torch.no_grad():
    print((x**2).requires_grad)

True
True
False
