In [1]:
import torch

In [6]:
X = torch.Tensor([[7,8,9], [10,11,12]])
X

tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])

In [22]:
W = torch.Tensor([[1,2,3], [4,5,6]])
W

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [23]:
B = torch.Tensor([[1,1,1], [2,2,2]])
B

tensor([[1., 1., 1.],
        [2., 2., 2.]])

In [24]:
Y = W*X + B
Y

tensor([[ 8., 17., 28.],
        [42., 57., 74.]])

In [25]:
W.requires_grad

False

In [26]:
B.requires_grad

False

In [27]:
Y.requires_grad

False

### By default gradient calculation is turned off for tensors. 
The flag "requires_grad" denotes whether gradients will be calculated or not. 

In [39]:
W.requires_grad_()
B.requires_grad_()

tensor([[1., 1., 1.],
        [2., 2., 2.]], requires_grad=True)

In [40]:
W.requires_grad

True

In [41]:
B.requires_grad

True

In [42]:
Y = W*X + B
Y

tensor([[ 8., 17., 28.],
        [42., 57., 74.]], grad_fn=<AddBackward0>)

In [43]:
print(W.grad)

None


In [44]:
print(B.grad)

None


### Call "backward" function to calculate gradients

In [45]:
Y.backward()

RuntimeError: grad can be implicitly created only for scalar outputs

### Gradients can be calculated using backward function only if the output is a scalar

In [46]:
Y = Y.mean()
Y

tensor(37.6667, grad_fn=<MeanBackward0>)

In [47]:
Y.backward()

In [48]:
W.grad

tensor([[1.1667, 1.3333, 1.5000],
        [1.6667, 1.8333, 2.0000]])

In [49]:
B.grad

tensor([[0.1667, 0.1667, 0.1667],
        [0.1667, 0.1667, 0.1667]])

In [52]:
W = W - W.grad
W

tensor([[-0.1667,  0.6667,  1.5000],
        [ 2.3333,  3.1667,  4.0000]], grad_fn=<SubBackward0>)

In [53]:
B = B - B.grad
B

tensor([[0.8333, 0.8333, 0.8333],
        [1.8333, 1.8333, 1.8333]], grad_fn=<SubBackward0>)

### Context switching

In [51]:
with torch.no_grad():
    Y1 = W*X + B
    print("Requires grad for Y1: ", Y1.requires_grad)
    print("Requires grad for Y: ", Y.requires_grad)

Requires grad for Y1:  False
Requires grad for Y:  True


In [54]:
def multiply(tensor, m):
    return tensor * m

In [55]:
@torch.no_grad()
def multiply_no_grad(tensor, m):
    return tensor * m

In [56]:
W

tensor([[-0.1667,  0.6667,  1.5000],
        [ 2.3333,  3.1667,  4.0000]], grad_fn=<SubBackward0>)

In [57]:
multiply(W, 2)

tensor([[-0.3333,  1.3333,  3.0000],
        [ 4.6667,  6.3333,  8.0000]], grad_fn=<MulBackward0>)

In [58]:
multiply_no_grad(W, 2)

tensor([[-0.3333,  1.3333,  3.0000],
        [ 4.6667,  6.3333,  8.0000]])