In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [10]:
class Net_model(nn.Module):
    def __init__(self):
        super(Net_model, self).__init__()
        self.conv1 = nn.Conv2d(1,6,5)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2,2))
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = x.view(-1, self.num_flat_features(x))
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        return x
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

model = Net_model()
print(model)

Net_model(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [11]:
params = list(model.parameters())
print(len(params))
for i in range(len(params)):
    print(params[i].size())

10
torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [12]:
input = torch.randn(1,1,32,32)
out  = model(input)
print(out)

tensor([[-0.1575,  0.0884, -0.1531, -0.0537, -0.0232,  0.0383, -0.0223,  0.0593,
         -0.0874,  0.0092]], grad_fn=<AddmmBackward>)


In [13]:
# 清零梯度缓存器
model.zero_grad()
out.backward(torch.randn(1,10)) # 使用随机的梯度反向传播

In [14]:
# 损失
output = model(input)
target = torch.randn(10) # 举例用
target = target.view(1,-1) # 形状匹配output
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor(0.8342, grad_fn=<MseLossBackward>)


In [28]:
# 反向传播
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x0000023A1F467E88>
<AddmmBackward object at 0x0000023A209E8AC8>
<AccumulateGrad object at 0x0000023A1F4B1D88>


In [29]:
model.zero_grad()
print(model.conv1.bias.grad)
loss.backward()
print(model.conv1.bias.grad)

tensor([0., 0., 0., 0., 0., 0.])
tensor([ 0.0268,  0.0077, -0.0006, -0.0082, -0.0042,  0.0055])


In [30]:
# 更新梯度
learning_rate = 0.01
for f in model.parameters():
    f.data.sub_(f.grad.data*learning_rate)

In [34]:
for f in model.parameters():
    print(f)

import torch.optim as optim
optimizer = optim.SGD(model.parameters(), lr=0.1)# 优化目标，学习率

optimizer.zero_grad() # 清空梯度缓存
output = model(input) # 输入，输出
loss = criterion(output, target) # 损失
loss.backward() # 反向传播
optimizer.step() # 更新梯度

Parameter containing:
tensor([[[[ 0.1638, -0.0158,  0.1532,  0.1199, -0.1132],
          [-0.1350, -0.1593,  0.0376,  0.1357,  0.1068],
          [ 0.0265, -0.0882, -0.1269, -0.1342, -0.0858],
          [-0.0565, -0.0165, -0.1706,  0.0473,  0.1748],
          [ 0.1294,  0.0149,  0.1362, -0.1917, -0.0500]]],


        [[[ 0.0991, -0.1263, -0.0944,  0.0593,  0.0749],
          [-0.0555, -0.0852, -0.0262,  0.1065,  0.1519],
          [ 0.1388, -0.1827,  0.1549,  0.0571,  0.0328],
          [ 0.0202,  0.1387,  0.0491, -0.0220, -0.1568],
          [-0.1813,  0.1852, -0.1241, -0.1160, -0.1826]]],


        [[[ 0.1071,  0.1662, -0.0660, -0.1967,  0.1804],
          [ 0.1810, -0.1751,  0.1529, -0.0273,  0.1239],
          [-0.1275, -0.1525, -0.0623,  0.0542, -0.1944],
          [ 0.0149,  0.1313, -0.1059, -0.0778, -0.0768],
          [ 0.0588, -0.0231,  0.0052, -0.1780, -0.1701]]],


        [[[ 0.1375, -0.0138,  0.0590, -0.1766,  0.0952],
          [-0.0556,  0.0962, -0.1520,  0.1311, -0.0382

In [35]:
for f in model.parameters():
    print(f)

Parameter containing:
tensor([[[[ 0.1618, -0.0159,  0.1539,  0.1192, -0.1126],
          [-0.1363, -0.1576,  0.0376,  0.1346,  0.1060],
          [ 0.0254, -0.0875, -0.1260, -0.1329, -0.0845],
          [-0.0570, -0.0158, -0.1688,  0.0484,  0.1743],
          [ 0.1277,  0.0152,  0.1342, -0.1906, -0.0495]]],


        [[[ 0.0999, -0.1262, -0.0943,  0.0589,  0.0748],
          [-0.0571, -0.0859, -0.0258,  0.1056,  0.1523],
          [ 0.1371, -0.1840,  0.1559,  0.0587,  0.0313],
          [ 0.0182,  0.1377,  0.0512, -0.0217, -0.1570],
          [-0.1813,  0.1849, -0.1228, -0.1156, -0.1832]]],


        [[[ 0.1075,  0.1655, -0.0644, -0.1968,  0.1810],
          [ 0.1808, -0.1754,  0.1528, -0.0283,  0.1237],
          [-0.1294, -0.1530, -0.0612,  0.0559, -0.1955],
          [ 0.0142,  0.1315, -0.1052, -0.0781, -0.0780],
          [ 0.0573, -0.0251,  0.0033, -0.1770, -0.1711]]],


        [[[ 0.1364, -0.0130,  0.0587, -0.1761,  0.0957],
          [-0.0551,  0.0968, -0.1527,  0.1310, -0.0411