In [1]:
import torch

  from .autonotebook import tqdm as notebook_tqdm


## for scalar output y, with default gradient

In [2]:
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [3]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [4]:
print(y.grad_fn)

<AddBackward0 object at 0x1111c7b20>


In [5]:
z = y * y * 3
out = z.mean()

print(z, out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)


In [6]:
print(out)

tensor(27., grad_fn=<MeanBackward0>)


### Let’s backprop now Because out contains a single scalar, out.backward() is equivalent to out.backward(torch.tensor(1)).

In [7]:
out.backward() # # this is the same as out.backward(tensor(1.))

In [9]:
#print gradients d(out)/dx = (1/4) * 3* (2 *x  + 4) = (1/4) * 3* (2 *1 + 4)  = 4.5
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


## For non-default gradients (passed as parameter to backward)
if you pass vᵀ as the gradient argument, then y.backward(gradient) will give you not J but vᵀ・J as the result of x.grad.

J is the jacobial (which should be the same)

A good reference here:
https://zhang-yang.medium.com/the-gradient-argument-in-pytorchs-backward-function-explained-by-examples-68f266950c29

In [16]:
x = torch.tensor([2., 5.], requires_grad=True)
print('x:', x)
y = 2*x**2 
print('y:', y)
gradient_value = [1.0, 0.1]
y.backward(torch.tensor(gradient_value)) 
print('x.grad:', x.grad)

x: tensor([2., 5.], requires_grad=True)
y: tensor([ 8., 50.], grad_fn=<MulBackward0>)
x.grad: tensor([8., 2.])


Explanation of the example above:
The Jacobian J should be: 4 * x = [8, 20]
The gradient v is : [1,0.1]
So the vᵀ・J return is [8*1, 20*0.1] = [8,2]