In [1]:
import torch

### embedding

In [4]:
l = torch.nn.Embedding(10, 3, padding_idx=2)
index = torch.LongTensor([9]) 

# fetch [index] item from the embedded data
out = l(index)
out.backward(torch.ones(1, 3))
print(out)
print(l.weight.grad)

tensor([[ 1.5472, -1.3130, -0.1248]], grad_fn=<EmbeddingBackward>)
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [1., 1., 1.]])


### autograd.grad

In [5]:
def compute_graph():
    x = torch.tensor([1., 2.], requires_grad=True)
    y = x * x
    
    return x, y

In [6]:
# if create_graph is False, grad is similar to backward(), except that the grad is not accumulate to input
x, y = compute_graph()
p = torch.autograd.grad(outputs=y, inputs=x, grad_outputs=torch.tensor([1., 1.]), create_graph=False)

print(x.grad)  # no output
y.backward(torch.tensor([1., 1.]))  # ERROR: the graph has been consumed 

None


RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [7]:
# if create_graph is True, grad() is not similar to backward(), and will not consume the computation graph
x, y = compute_graph()
p = torch.autograd.grad(outputs=y, inputs=x, grad_outputs=torch.tensor([1., 1.]), create_graph=True)

y.backward(torch.tensor([1., 1.])) # correct