In [45]:
import torch

# 相当于一个5维到3维的线性投射？
x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

In [46]:
print(w)
print(b)
torch.matmul(x, w) # 1*5 X 5*3 = 1*3

tensor([[-1.0182, -0.1977,  1.0812],
        [-0.4017, -1.5874,  1.4453],
        [-0.2338, -1.0520, -0.5000],
        [-0.7544, -1.0381,  0.1394],
        [-1.5684,  0.5453, -1.8954]], requires_grad=True)
tensor([-0.2461, -0.5106,  0.2211], requires_grad=True)


tensor([-3.9765, -3.3299,  0.2705], grad_fn=<SqueezeBackward3>)

In [47]:
loss

tensor(0.3349, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)

In [48]:
print('Gradient function for z =',z.grad_fn)
print('Gradient function for loss =', loss.grad_fn)

Gradient function for z = <AddBackward0 object at 0x000001BF0A81E788>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward object at 0x000001BF0A81E748>


In [49]:
loss.backward()

In [50]:
print(w.grad)
print(b.grad)

tensor([[0.0048, 0.0070, 0.2068],
        [0.0048, 0.0070, 0.2068],
        [0.0048, 0.0070, 0.2068],
        [0.0048, 0.0070, 0.2068],
        [0.0048, 0.0070, 0.2068]])
tensor([0.0048, 0.0070, 0.2068])


In [28]:
# Disabling Gradient Tracking 
# for example, when we have trained the model and just want to apply it to some input data, i.e. we only want to do forward computations through the network.
z = torch.matmul(x, w)+b
print(z.requires_grad)

# disable gradient tracking
with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

# disable gradient tracking
z_det = z.detach()
print(z_det.requires_grad)

## Reasons for disabling
    # To mark some parameters in your neural network at frozen parameters. This is a very common scenario for finetuning a pretrained network
    # To speed up computations when you are only doing forward pass, because computations on tensors that do not track gradients would be more efficient.


True
False
False


In [37]:
inp = torch.eye(5, requires_grad=True)
out = (inp+1).pow(2)
out.backward(torch.ones_like(inp), retain_graph=True)
print("First call\n", inp.grad)
out.backward(torch.ones_like(inp), retain_graph=True)
print("\nSecond call\n", inp.grad)
inp.grad.zero_()
out.backward(torch.ones_like(inp), retain_graph=True)
print("\nCall after zeroing gradients\n", inp.grad)

First call
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])

Second call
 tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.],
        [4., 4., 4., 4., 8.]])

Call after zeroing gradients
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])


In [35]:
print("First call\n", inp.grad)

First call
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])
