In [25]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [26]:
class Net_model(nn.Module):
    def __init__(self):
        super(Net_model, self).__init__()
        self.conv1 = nn.Conv2d(1,6,5)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2,2))
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = x.view(-1, self.num_flat_features(x))
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        return x
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

model = Net_model()
print(model)

from torchviz import make_dot
vis_graph = make_dot(model(input),params=dict(model.named_parameters()))
vis_graph.view()

Net_model(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


'Digraph.gv.pdf'

In [27]:
params = list(model.parameters())
print(len(params))
for i in range(len(params)):
    print(params[i].size())

10
torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [28]:
input = torch.randn(1,1,32,32)
out  = model(input)
print(out)

tensor([[-0.0363, -0.0779,  0.0193, -0.0079, -0.0225, -0.0981,  0.0226,  0.0253,
          0.0370, -0.0357]], grad_fn=<AddmmBackward>)


In [29]:
# 清零梯度缓存器
model.zero_grad()
out.backward(torch.randn(1,10)) # 使用随机的梯度反向传播

In [30]:
# 损失
output = model(input)
target = torch.randn(10) # 举例用
target = target.view(1,-1) # 形状匹配output
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor(1.2058, grad_fn=<MseLossBackward>)


In [31]:
# 反向传播
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x000001C98EC14550>
<AddmmBackward object at 0x000001C98EC14630>
<AccumulateGrad object at 0x000001C98EC14550>


In [32]:
model.zero_grad()
print(model.conv1.bias.grad)
loss.backward()
print(model.conv1.bias.grad)

tensor([0., 0., 0., 0., 0., 0.])
tensor([ 0.0030, -0.0331,  0.0024,  0.0045, -0.0107,  0.0382])


In [33]:
# 更新梯度
learning_rate = 0.01
for f in model.parameters():
    f.data.sub_(f.grad.data*learning_rate)

In [34]:
for f in model.parameters():
    print(f)

import torch.optim as optim
optimizer = optim.SGD(model.parameters(), lr=0.1)# 优化目标，学习率

optimizer.zero_grad() # 清空梯度缓存
output = model(input) # 输入，输出
loss = criterion(output, target) # 损失
loss.backward() # 反向传播
optimizer.step() # 更新梯度

Parameter containing:
tensor([[[[-0.0315, -0.0231,  0.0513, -0.0988, -0.0617],
          [ 0.1132, -0.0293,  0.0331, -0.1649,  0.1242],
          [ 0.0693,  0.1757, -0.0522,  0.1161,  0.1203],
          [ 0.1502, -0.0060,  0.0765,  0.0915, -0.1043],
          [-0.1257, -0.1952,  0.1667, -0.1815,  0.1967]]],


        [[[ 0.1438,  0.1094, -0.0785,  0.1526, -0.1294],
          [ 0.1068,  0.1566, -0.0145,  0.0785, -0.1714],
          [-0.0191,  0.0217,  0.0624, -0.0260,  0.0531],
          [ 0.0126,  0.1565, -0.0345, -0.0618,  0.1268],
          [ 0.0294,  0.0773, -0.0076, -0.0075, -0.0115]]],


        [[[ 0.0102,  0.0445, -0.0971,  0.0412,  0.1565],
          [-0.0857,  0.1271,  0.0053, -0.0866, -0.0836],
          [-0.1664,  0.1254,  0.0783,  0.1316,  0.1859],
          [-0.0990,  0.1641,  0.1415,  0.1128, -0.0006],
          [-0.1884, -0.1830, -0.1412, -0.0460,  0.1183]]],


        [[[-0.1679,  0.1486, -0.0366,  0.1664, -0.0814],
          [-0.1302,  0.0299, -0.1252,  0.0119,  0.1983

In [35]:
for f in model.parameters():
    print(f)


Parameter containing:
tensor([[[[-0.0295, -0.0244,  0.0540, -0.0990, -0.0631],
          [ 0.1128, -0.0304,  0.0345, -0.1648,  0.1241],
          [ 0.0689,  0.1768, -0.0525,  0.1161,  0.1203],
          [ 0.1498, -0.0067,  0.0771,  0.0907, -0.1049],
          [-0.1247, -0.1928,  0.1642, -0.1829,  0.1979]]],


        [[[ 0.1467,  0.1067, -0.0820,  0.1546, -0.1309],
          [ 0.1057,  0.1567, -0.0161,  0.0788, -0.1742],
          [-0.0222,  0.0231,  0.0630, -0.0267,  0.0534],
          [ 0.0104,  0.1589, -0.0351, -0.0612,  0.1253],
          [ 0.0285,  0.0790, -0.0074, -0.0088, -0.0103]]],


        [[[ 0.0080,  0.0470, -0.0988,  0.0417,  0.1550],
          [-0.0859,  0.1284,  0.0042, -0.0883, -0.0826],
          [-0.1653,  0.1238,  0.0778,  0.1279,  0.1896],
          [-0.0993,  0.1601,  0.1419,  0.1150, -0.0006],
          [-0.1861, -0.1821, -0.1423, -0.0458,  0.1189]]],


        [[[-0.1666,  0.1474, -0.0378,  0.1662, -0.0790],
          [-0.1306,  0.0310, -0.1242,  0.0106,  0.1998