In [None]:
 import torch

 import math

In [5]:
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
    print("Using Apple's MPS")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using CUDA gpu: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print(f"Using cpu")
print(f"Device is: {device}")

Using Apple's MPS
Device is: mps


In [None]:
# Simple Eg 1
x = torch.tensor([3.0], requires_grad = True)
y = torch.pow(x,2)
print(f"y = x^2. Here x=3, so dy/dx = 2(x), ie 6")
# Calculate gradient of y w.r.t x
y.backward()
print(f"gradient of y w.r.t x is: {x.grad.item()}")

y = x^2. Here x=3, so dy/dx = 2(x), ie 6
gradient of y w.r.t x is: 6.0


In [30]:
# Example 2
x = torch.tensor([4.0], requires_grad = True)
y = torch.pow(x,2)
z = torch.sin(y)
print(f"y = sin(x^2). Here x={x.item()}, so dy/dx = cos(x^2)*2x, ie {math.cos(x**2)*2*x.item()}")

# Calculate gradients of z w.r.t its parameters
z.retain_grad()
y.retain_grad()
z.backward()

print(f"Gradient of z w.r.t as per autograd: {x.grad.item()}")
# both are almost same

y = sin(x^2). Here x=4.0, so dy/dx = cos(x^2)*2x, ie -7.661275842587077
Gradient of z w.r.t as per autograd: -7.661275863647461


In [None]:
'''
leaf node, intermediate node, root node. By default grad is stored only for leaf nodes. If we wish
to access grad for non leaf nodes we need to do retain_grad() for all the parameters for which
we want to access the gradient
'''
# Before retaining grad for y & z we get error
y.grad

  """Entry point for launching an IPython kernel.


In [31]:
# After retaining grad for y and z
y.grad, z.grad

(tensor([-0.9577]), tensor([1.]))

In [None]:
'''
We can do all this with tensors instead of a single value.
NOTE: Don't forget to clear gradient before every backward calculation because they 
keep getting accumulated and getting added in the tensors gradient. 
Do this: x.grad = None

Disable gradient calculation during inferencing using
with torch.no_grad():
    'your code goes here'

or using decorator before a function
@torch.no_grad()
def fun():
    'code of the fuction'

or you can set requires grad for all parameters to False
tensor.requires_grad(False)
this would be done for all params

'''

"\nWe can do all this with tensors instead of a single value.\nNOTE: Don't forget to clear gradient before every backward calculation because they \nkeep getting accumulated and getting added in the tensors gradient. \nDo this: x.grad = None\n"

In [None]:
#using detach()
x = torch.tensor(2.0, requires_grad=True)
y = (x.detach())**2
y1 = x**2
y, y1
# one has grad function, one doesn't

(tensor(4.), tensor(4., grad_fn=<PowBackward0>))