### 3.4 模型构建

####  3.4.1 神经网络的构造

In [1]:
import torch
from torch import nn

class MLP(nn.Module):
  def __init__(self, **kwargs):
    super(MLP, self).__init__()
    self.hidden = nn.Linear(784, 256)
    self.act = nn.ReLU()
    self.output = nn.Linear(256, 10)
    
  def forward(self, x):
    a = self.act(self.hidden(x))
    return self.output(a)

In [3]:
X = torch.rand(2, 784)
net = MLP()
print(net)
net(X)

MLP(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)


tensor([[ 0.0658,  0.1477,  0.0087, -0.1591, -0.0906,  0.1742,  0.0189,  0.1231,
          0.0275, -0.0591],
        [-0.0126,  0.0261,  0.0054, -0.0588, -0.0947,  0.1191,  0.0598, -0.0363,
          0.1346, -0.0306]], grad_fn=<AddmmBackward0>)

#### 3.4.2 神经网络中常见的层


1. 不含模型参数的层

In [7]:
import torch
from torch import nn

class MyLayer(nn.Module):
  def __init__(self, **kwargs):
    super(MyLayer, self).__init__(**kwargs)
  def forward(self, x):
    return x - x.mean()

In [8]:
layer = MyLayer()
layer(torch.tensor([1, 2, 3, 4, 5], dtype=torch.float))

tensor([-2., -1.,  0.,  1.,  2.])

2. 含模型参数的层

In [15]:
class MyListDense(nn.Module):
  def __init__(self):
    super(MyListDense, self).__init__()
    self.params = nn.ParameterList([nn.Parameter(torch.randn(4, 4)) for i in range(3)])
    self.params.append(nn.Parameter(torch.randn(4, 1)))
    
  def forward(self, x):
    for i in range(len(self.params)):
      x = torch.mm(x, self.params[i])
    return x
  
net = MyListDense()
print(net)
net(torch.randn(4,4))

MyListDense(
  (params): ParameterList(
      (0): Parameter containing: [torch.float32 of size 4x4]
      (1): Parameter containing: [torch.float32 of size 4x4]
      (2): Parameter containing: [torch.float32 of size 4x4]
      (3): Parameter containing: [torch.float32 of size 4x1]
  )
)


tensor([[-70.6800],
        [  8.7415],
        [ -3.3696],
        [ -0.9187]], grad_fn=<MmBackward0>)

In [17]:
class MyDictDense(nn.Module):
  def __init__(self):
    super(MyDictDense, self).__init__()
    self.params = nn.ParameterDict({
      'linear1': nn.Parameter(torch.randn(4, 4)),
      'linear2': nn.Parameter(torch.randn(4, 4)),
    })
    self.params.update({'linear3': nn.Parameter(torch.randn(4, 1))})
  
  def forward(self, x, choice='linear1'):
    return torch.mm(x, self.params[choice])
  
net = MyDictDense()
print(net)
net(torch.randn(4,4), "linear3")

MyDictDense(
  (params): ParameterDict(
      (linear1): Parameter containing: [torch.FloatTensor of size 4x4]
      (linear2): Parameter containing: [torch.FloatTensor of size 4x4]
      (linear3): Parameter containing: [torch.FloatTensor of size 4x1]
  )
)


tensor([[ 0.2645],
        [ 1.2620],
        [ 3.0418],
        [-0.0383]], grad_fn=<MmBackward0>)

In [None]:
import torch
from torch import nn

def corr2d(X, K):
  h, w = K.shape
  X, K = X.float(), K.float()
  Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
  for i in range(Y.shape[0]):
    for j in range(Y.shape[1]):
      Y[i, j] = (X[i:i+h, j:j+w] * K).sum()
  return Y

class Conv2D(nn.Module):
  def __init__(self, kernel_size):
    super(Conv2D, self).__init__()
    self.weight = nn.Parameter(torch.randn(kernel_size))
    self.bias = nn.Parameter(torch.randn(1))
    
  def forward(self, x):
    return corr2d(x, self.weight) + self.bias
  


In [19]:
def comp_conv2d(conv2d, X):
  X = X.view((1, 1) + X.shape)
  Y = conv2d(X)
  return Y.view(Y.shape[2:])

conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1)
X = torch.rand(8, 8)
comp_conv2d(conv2d, X).shape

torch.Size([8, 8])

In [26]:
import torch
from torch import nn

def pool2d(X, pool_size, mode="max"):
  p_h, p_w = pool_size
  Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))
  for i in range(Y.shape[0]):
    for j in range(Y.shape[1]):
      if mode == "max":
        Y[i, j] = X[i:i+p_h, j:j+p_w].max()
      elif mode == "avg":
        Y[i, j] = X[i:i+p_h, j:j+p_w].mean()        
  return Y

X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype=torch.float)
pool2d(X, (2, 2))

tensor([[4., 5.],
        [7., 8.]])

#### 3.4.3 模型示例

In [33]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    #输入图像channel：1，输出channel：6，kernel_size：5*5卷积核
    self.conv1 = nn.Conv2d(1, 6, 5)
    self.conv2 = nn.Conv2d(6, 16, 5)
    # an affine operation: y = Wx + b
    self.fc1 = nn.Linear(16 * 5 * 5, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)
    
  def forward(self, x):
    # 2x2 Max pooling
    x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
    # 如果是方阵，则可以只是用一个数字进行定义
    x = F.max_pool2d(F.relu(self.conv2(x)), 2)
    x = x.view(-1, self.num_flat_features(x))
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x
  
  def num_flat_features(self, x):
    size = x.size()[1:]
    num_features = 1
    for s in size:
      num_features *= s
    return num_features
  
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [34]:
params = list(net.parameters())
print(len(params))
for i in range(len(params)):
  print(params[i].size()) 

10
torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [42]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.0353,  0.0340, -0.0714, -0.0403, -0.0404, -0.0961, -0.0351, -0.0009,
          0.0413,  0.1175]], grad_fn=<AddmmBackward0>)


In [43]:
net.zero_grad()
out.backward(torch.randn(1, 10))

In [44]:
class AlexNet(nn.Module):
  def __init__(self):
    super(AlexNet, self).__init__()
    self.conv = nn.Sequential(
      nn.Conv2d(1, 96, 11, 4),
      nn.ReLU(),
      nn.MaxPool2d(3,2),
      nn.Conv2d(96, 256, 5, 1, 2),
      nn.ReLU(),
      nn.MaxPool2d(3, 2),
      
      nn.Conv2d(256, 384, 3, 1, 1),
      nn.ReLU(),
      nn.Conv2d(384, 384, 3, 1, 1),
      nn.ReLU(),
      nn.Conv2d(384, 256, 3, 1, 1),
      nn.ReLU(),
      nn.MaxPool2d(3, 2)
    )
    self.fc = nn.Sequential(
      nn.Linear(256*5*5, 4096),
      nn.ReLU(),
      nn.Dropout(0.5),
      nn.Linear(4096, 4096),
      nn.ReLU(),
      nn.Dropout(0.5),
      nn.Linear(4096, 10)
    )
  
  def forward(self, img):
    feature = self.conv(img)
    output = self.fc(feature.view(img.shape[0], -1))
    return output
  
net = AlexNet()
print(net)

AlexNet(
  (conv): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=6400, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (