In [100]:
import torch

#### We dont need to specify requires_grad = False, since by default it flags it as False

In [101]:
tensor1 = torch.Tensor([[1, 2, 3], 
                       [4, 5, 6]])
tensor1

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [102]:
tensor2 = torch.Tensor([[7, 8, 9], 
                        [10, 11, 12]])

tensor2

tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])

#### The requires_grad property defines whether to track operations on this tensor
By default, it is set to False

In [103]:
tensor1.requires_grad

False

In [104]:
tensor2.requires_grad

False

#### The requires\_grad\_() function sets requires_grad to True

In [105]:
# tensor1.requires_grad_()
tensor1.requires_grad_()

tensor([[1., 2., 3.],
        [4., 5., 6.]], requires_grad=True)

In [106]:
tensor1.requires_grad

True

In [107]:
tensor2.requires_grad

False

#### The .grad property stores all the gradients for the tensor
However, there are no gradients yet

In [108]:
print(tensor1.grad)


None


#### The .grad_fn property contains the gradient function
This has not been set either

In [109]:
print(tensor1.grad_fn)


None


#### Create a new output tensor from our original tensor

In [110]:
output_tensor = tensor1 * tensor2

#### The requires_grad property has been derived from the original tensor

In [111]:
output_tensor.requires_grad

True

#### There are still no gradients

In [112]:
print(output_tensor.grad)
print(output_tensor.grad_fn)

None
<MulBackward0 object at 0x0000027038889220>


  print(output_tensor.grad)


#### But there is a gradient function
This is from the multiplication operation performed on the original tensor 

In [113]:
print(output_tensor.grad_fn)

<MulBackward0 object at 0x0000027038889130>


#### The original tensor still does not have a gradient function

In [114]:
print(tensor1.grad_fn)

None


In [115]:
print(tensor2.grad_fn)

None


#### Changing the operation for the output changes the gradient function
The gradient function only contains the last operation. Here, even though there is a multiplication as well as a mean, only the mean calculation is recorded as the gradient function

In [116]:
output_tensor = (tensor1 * tensor2).mean()
print(output_tensor.grad_fn)

<MeanBackward0 object at 0x0000027038889970>


#### In spite of setting a gradient function for the output, the gradients for the input tensor is still empty

In [117]:
print(tensor1.grad)

None


#### To calculate the gradients, we need to explicitly perform a backward propagation

In [118]:
output_tensor.backward()


#### The gradients are now available for the input tensor

Future calls to backward will accumulate gradients into this vector

In [119]:
print(tensor1.grad)


tensor([[1.1667, 1.3333, 1.5000],
        [1.6667, 1.8333, 2.0000]])


#### The gradient vector is the same shape as the original vector

In [120]:
tensor1.grad.shape, tensor1.shape

(torch.Size([2, 3]), torch.Size([2, 3]))

In [121]:
print(tensor2.grad)

None


In [122]:
print(output_tensor.grad)

None


  print(output_tensor.grad)


#### The requires_grad property propagates to other tensors
Here the new_tensor is created from the original tensor and gets the original's value of requires_grad

In [123]:
new_tensor = tensor1 * 3
print(new_tensor.requires_grad)

True


In [124]:
new_tensor

tensor([[ 3.,  6.,  9.],
        [12., 15., 18.]], grad_fn=<MulBackward0>)

#### Turning off gradient calculations for tensors
You can also stops autograd from tracking history on newly created tensors with requires_grad=True by wrapping the code block in <br />
<b>with torch.no_grad():</b>

In [125]:
with torch.no_grad():
    
    new_tensor = tensor1 * 3
    
    print('new_tensor = ', new_tensor)
    
    print('requires_grad for tensor = ', tensor1.requires_grad)
    
    print('requires_grad for tensor = ', tensor2.requires_grad)
    
    print('requires_grad for new_tensor = ', new_tensor.requires_grad)

new_tensor =  tensor([[ 3.,  6.,  9.],
        [12., 15., 18.]])
requires_grad for tensor =  True
requires_grad for tensor =  False
requires_grad for new_tensor =  False


#### Can turn off gradient calculations performed within a function

In [126]:
def calculate(t):
    return t * 2

In [127]:
@torch.no_grad()
def calculate_with_no_grad(t):
    return t * 2

In [128]:
result_tensor = calculate(tensor1)

result_tensor

tensor([[ 2.,  4.,  6.],
        [ 8., 10., 12.]], grad_fn=<MulBackward0>)

In [129]:
result_tensor.requires_grad

True

In [130]:
result_tensor_no_grad = calculate_with_no_grad(tensor1)

result_tensor_no_grad

tensor([[ 2.,  4.,  6.],
        [ 8., 10., 12.]])

In [131]:
result_tensor_no_grad.requires_grad

False

#### Can explicitly enabled gradients within a no_grad() context

There is an equivalent @torch.enable_grad() as well

In [132]:
with torch.no_grad():
    
    new_tensor_no_grad = tensor1 * 3
    
    print('new_tensor_no_grad = ', new_tensor_no_grad)
    
    with torch.enable_grad():
        
        new_tensor_grad = tensor1 * 3
    
        print('new_tensor_grad = ', new_tensor_grad)

new_tensor_no_grad =  tensor([[ 3.,  6.,  9.],
        [12., 15., 18.]])
new_tensor_grad =  tensor([[ 3.,  6.,  9.],
        [12., 15., 18.]], grad_fn=<MulBackward0>)


### Result tensors get requires_grad properties from input tensors

In [133]:
tensor_one = torch.tensor([[1.0, 2.0], 
                           [3.0, 4.0]], requires_grad=True)  
tensor_one

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

In [134]:
tensor_two = torch.Tensor([[5, 6], 
                           [7, 8]])
tensor_two

tensor([[5., 6.],
        [7., 8.]])

#### enable the gradients for  two tensors

In [135]:
tensor_one.requires_grad

True

In [136]:
tensor_two.requires_grad_()

tensor([[5., 6.],
        [7., 8.]], requires_grad=True)

In [137]:
final_tensor = (tensor_one + tensor_two).mean()
final_tensor

tensor(9., grad_fn=<MeanBackward0>)

#### final tensor has gradients enabled as it derives from the tensors its made up of

In [138]:
final_tensor.requires_grad

True

In [139]:
print(tensor_one.grad)

None


In [141]:
print(tensor_two.grad)

None


In [142]:
print (final_tensor)

tensor(9., grad_fn=<MeanBackward0>)


In [143]:
print(final_tensor.grad)

None


  print(final_tensor.grad)


In [144]:
final_tensor.backward()

In [145]:
print(tensor_one.grad)

tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])


In [146]:
print(tensor_two.grad)

tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])


In [147]:
print(final_tensor.grad)

None


  print(final_tensor.grad)


#### Detach tensors from the computation graph

In [148]:
detached_tensor = tensor_one.detach()

detached_tensor

tensor([[1., 2.],
        [3., 4.]])

In [149]:
tensor_one

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

In [150]:
mean_tensor = (tensor_one + detached_tensor).mean()

mean_tensor.backward()

In [151]:
tensor_one.grad

tensor([[0.5000, 0.5000],
        [0.5000, 0.5000]])

In [152]:
print(detached_tensor.grad)

None
