In [8]:
import torch

##### AUTOGRAD PACKAGE PROVIDE AUTOMATIC DIFFERENTIATION FOR ALL OPERATIONS ON TENSORS


In [9]:
# require_grad=True -> tracks all operations on the tensors
x=torch.randn(3,requires_grad=True)
# y HAS CREATED AS RESULT OF AN OPERATION,SO IT HAS A grad_fn attribute.
y=x+2
# grad_fn: references a Function that has created the Tensor
print(x)
print(y)
print(y.grad_fn)

tensor([-0.6594,  0.3814,  0.4056], requires_grad=True)
tensor([1.3406, 2.3814, 2.4056], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x0000020A614FBB20>


In [10]:
# DO more operations on y
z=y*y*3
print(z) 
z=z.mean()
print(z)

tensor([ 5.3917, 17.0137, 17.3603], grad_fn=<MulBackward0>)
tensor(13.2553, grad_fn=<MeanBackward0>)


In [None]:
# Let's compute the gradients with backpropagation
# When we finish our computation we can call .backward() and have all the gradients computed automatically.
# The gradient for this tensor will be accumulated into .grad attribute.
# It is the partial derivate of the function w.r.t. the tensor
z.backward()
print(x.grad) #dz/dx
# Generally speaking, torch.autograd is an engine for computing vector-Jacobian product
# It computes partial derivates while applying the chain rule

tensor([2.6812, 4.7629, 4.8111])


- Model with non-scalar output:
- If a Tensor is non-scalar (more than 1 elements), we need to specify arguments for backward() 
- specify a gradient argument that is a tensor of matching shape.
- needed for vector-Jacobian product

In [12]:
x=torch.randn(3,requires_grad=True)
x

tensor([-0.0869, -0.2552,  0.1768], requires_grad=True)

In [27]:
x.grad

tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])

In [13]:
y=x*2
print(y)
for _ in range(10):
    y=y*2
print(y)
print(y.shape)    

tensor([-0.1739, -0.5105,  0.3535], grad_fn=<MulBackward0>)
tensor([-178.0524, -522.7490,  361.9877], grad_fn=<MulBackward0>)
torch.Size([3])


In [14]:
y.backward()

RuntimeError: grad can be implicitly created only for scalar outputs

In [15]:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float32)
y.backward(v)
print(x.grad)

tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])


##### Stop a tensor from tracking history:

In [None]:
# For example during our training loop when we want to update our weights
# then this update operation should not be part of the gradient computation
# - x.requires_grad_(False)
# - x.detach()
# - wrap in 'with torch.no_grad():'

a=torch.rand(2,2)
print(a.requires_grad)
b=((a*3)/(a-1))
print(b.grad_fn)
# .requires_grad_(...) changes an existing flag in-place.
a.requires_grad_(True)
print(a.requires_grad)
b=(a*a).sum()
print(b.grad_fn)

False
None
True
<SumBackward0 object at 0x0000020A03EB85E0>


In [18]:
# .detch(): get a new Tensor with same content but no gradient computation:
a=torch.randn(2,2,requires_grad=True)
print(a.requires_grad)
b=a.detach()
print(b.requires_grad)

True
False


In [19]:
# wrap in 'with torch.no_grad():
a=torch.randn(2,2,requires_grad=True)
print(a.requires_grad)
with torch.no_grad():
    print((x**2).requires_grad)

True
False


In [26]:
# backward() accumulates the gradient for this tensor into .grad attribute.
# We need to be careful during optimization !!!
# Use .zero_() to empty the gradients before a new optimization step!
weights = torch.ones(4, requires_grad=True)

for epoch in range(2):
    # just a dummy example
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
        # optimize model, i.e. adjust weights...
    with torch.no_grad():
        weights -= 0.1 * weights.grad

    # this is important! It affects the final weights & output
    weights.grad.zero_()
print(weights)
print(model_output)    

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([0.4000, 0.4000, 0.4000, 0.4000], requires_grad=True)
tensor(8.4000, grad_fn=<SumBackward0>)


In [28]:
import torch

a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

In [29]:
Q = 3*a**3 - b**2


In [30]:
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)

In [31]:
a.grad

tensor([36., 81.])

In [32]:
# check if collected gradients are correct
print(9*a**2 == a.grad)
print(-2*b == b.grad)

tensor([True, True])
tensor([True, True])


In [33]:
a

tensor([2., 3.], requires_grad=True)

tensor(3., grad_fn=<MaxBackward1>)