https://mp.weixin.qq.com/s?__biz=MzIyNjM2MzQyNg==&mid=2247516912&idx=1&sn=c0fcdc2a451550ff724cbf800448a78e&chksm=e87353bddf04daab44110055fcc5000f1ac323e24fba7eb4cefe717a24ae5d2494b394460fd5&mpshare=1&scene=1&srcid=0724hI0ayqfTrMFtmHN1a7kW&sharer_sharetime=1595592216338&sharer_shareid=8724c7983f20914ec3c0c086bba89335&key=a9ac97d80b28f505d6e5d26dd98a3bcaeab9c83c29a5f78dc70722931856727461aeafd7bda165af49b29e72fba2d729df6f491d7ea554188a5b3340cac37ed325f3ca13ade58a9c3bdc456b4f02fcec&ascene=1&uin=MjA1MjAyODkxNg%3D%3D&devicetype=Windows+10+x64&version=62090529&lang=zh_CN&exportkey=AYPZEIyB0xT1XdUvzcnw%2FI0%3D&pass_ticket=GXsSUduz0ZwByPtLM3YpLgpayfsvEM31MQ8HNS8Wkm0wVidoP99vsfP%2FFcNt%2BK%2BV

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1,6,3)
        self.conv2 = nn.Conv2d(6,16,3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16*6*6,120)   # 6*6 from image dimension
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
    def forward(self,x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))  # CLASStorch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
        x = F.max_pool2d(F.relu(self.conv2(x)),2)
        x = x.view(-1,self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    def num_flat_features(self,x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        print(num_features)
        return num_features
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [4]:
# 模型可学习参数由net.parameters()返回
params = list(net.parameters())
print(len(params))
print(params[0].size()) # conv1's .weight

10
torch.Size([6, 1, 3, 3])


In [6]:
# 尝试输入一个32*32随机输入
input = torch.randn(1,1,32,32)
out = net(input)
print(out)

576
tensor([[ 0.0936, -0.0775,  0.0811, -0.0183, -0.0172,  0.1378,  0.0443, -0.0142,
         -0.0157,  0.0379]], grad_fn=<AddmmBackward>)


In [7]:
# 用随机梯度将所有参数和反向传播器的梯度缓冲区归零
net.zero_grad()
out.backward(torch.randn(1,10))

In [14]:
# 损失函数
output = net(input)
target = torch.randn(10)
target = target.view(1,-1)
print(target.shape)
print(output.shape)
criterion = nn.MSELoss()
loss = criterion(output,target)
print(loss)

576
torch.Size([1, 10])
torch.Size([1, 10])
tensor(1.2537, grad_fn=<MseLossBackward>)


In [15]:

# 如果loss使用.grad_fn属性的属性向后移动，可查看网络结构
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x000001FC2A1F32B0>
<AddmmBackward object at 0x000001FC2A1F3978>
<AccumulateGrad object at 0x000001FC2A1F32B0>


In [16]:
# 更新权重
import torch.optim as optim
optimizer = optim.SGD(net.parameters(),lr=0.01)
optimizer.zero_grad()
output = net(input)
loss = criterion(output,target)
loss.backward()
optimizer.step()

576


In [17]:
output

tensor([[ 0.0930, -0.0748,  0.0817, -0.0171, -0.0158,  0.1382,  0.0452, -0.0117,
         -0.0133,  0.0397]], grad_fn=<AddmmBackward>)

In [18]:
target.view(1,-1)

tensor([[-1.3092, -1.9284,  1.8633,  0.7304,  0.4058,  0.2075, -1.5073,  0.2485,
         -0.2118, -0.7981]])