In [95]:
import torch

gpu = torch.cuda.current_device()
torch.cuda.get_device_name()

'NVIDIA GeForce GTX 1650 with Max-Q Design'

In [96]:
# rand nums from standard normal distribution
x = torch.randn(3, requires_grad=True)

In [97]:
y = x+2
# now pytorch would calculate a function dy/dx
# this function can be used in back propagation and to get gradients

y
# AddBackward0

tensor([1.1645, 2.3743, 1.4053], grad_fn=<AddBackward0>)

In [98]:
z = torch.pow(y, 2) * 2
z
# MulBackward0

tensor([ 2.7122, 11.2745,  3.9497], grad_fn=<MulBackward0>)

In [99]:
z = z.mean()
z
# MeanBackward0

tensor(5.9788, grad_fn=<MeanBackward0>)

In [100]:
# to calculate gradient -> dz/dx
z.backward()
x.grad

tensor([1.5527, 3.1657, 1.8737])

In [101]:
# what if we don't call mean(). Then z would be a vector of 3 values
x = torch.randn(3, requires_grad=True)
y = x+2
z = torch.pow(y, 2) * 2
z

tensor([8.7687, 3.8656, 4.6884], grad_fn=<MulBackward0>)

In [102]:
# now calling backward() would throw error
# we need to give it a vector
v = torch.tensor([0.1, 1.001, 0.0001], dtype=torch.double)
z.backward(v)
x.grad

tensor([8.3755e-01, 5.5666e+00, 6.1243e-04])

In [103]:
# if we want to temporarily stop gradient

x.requires_grad_(False) # method 1
y = x.detach() # method 2

with torch.no_grad():
    print("Do calculations")

Do calculations


In [104]:
weights = torch.ones(3, requires_grad=True)
for epoch in range(3):
    model_output = (weights * 3).sum()
    model_output.backward()
    print(weights.grad)

    # so after each iteration, gradients should be cleared
    weights.grad.zero_()


tensor([3., 3., 3.])
tensor([3., 3., 3.])
tensor([3., 3., 3.])
