In [1]:
import torch

In [3]:
x = torch.rand(3, requires_grad=True)
print(x)

tensor([0.9540, 0.7570, 0.3306], requires_grad=True)


In [4]:
y = x+2
print(y) # See the 'grad_fn' attribute

tensor([2.9540, 2.7570, 2.3306], grad_fn=<AddBackward0>)


In [5]:
z = y*y*2
print(z) # Now the grad_fn is "Mulbackward"

tensor([17.4521, 15.2016, 10.8630], grad_fn=<MulBackward0>)


In [6]:
z = z.mean()
print(z)

tensor(14.5056, grad_fn=<MeanBackward0>)


In [7]:
z.backward() # Will calculte the gradients of Z to repect to X (dz/dx)
print(x.grad)

tensor([3.9387, 3.6759, 3.1074])


In [8]:
# Remove the gradients attribute (option #1)
x.requires_grad_(False)
print(x) # you do not see gradient function anymore

tensor([0.9540, 0.7570, 0.3306])


In [9]:
# Remove the required gradients attribute (option #2)
a = x.detach_()
print(a)

tensor([0.9540, 0.7570, 0.3306])


In [10]:
# Remove the required gradients attribute (option #3)
with torch.no_grad():
    b = x+2
    print(b)

tensor([2.9540, 2.7570, 2.3306])


In [14]:
# Some random example
weights = torch.ones(4, requires_grad=True)

for epoch in range(2):
    model_output = (weights*3).sum()
    print(model_output) # Summed gradient
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_() # We must empty the gradients after each epoch - try to comment this out

tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])


In [None]:
# Stochastic Gradient Descent - JUST A HIGH-LEVEL EXAMPLE. DON'T RUN THIS AS IT WILL ERROR
optimizer = torch.optim.SGD(weights, lr=0.01)
optimizer.step()
optimizer.zero_grad() # empty the gradients before another iteration