# Pytorch中的神经网络基本单元

In [1]:
import torch
from torch import nn

# nn.Module

nn.Module是神经网络结构的表示，它可以表示一个层，也可以表示一个结构块，也可以表示一个完整的模型结构。

## 自定义一个layer

In [6]:
class ReluLayer(nn.Module):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def forward(self, x):
        return (x > 0) * x

relu = ReluLayer()
print(relu)

ReluLayer()


In [7]:
a = torch.randn(2, 3)
relu(a)

tensor([[-0.0000, -0.0000, -0.0000],
        [0.0045, 1.7920, -0.0000]])

In [8]:
# 带参数的Layer
class MyFCLayer(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(in_dim, out_dim))
        self.bias = nn.Parameter(torch.randn(out_dim))

    def forward(self, x):
        return x.matmul(self.weights.data) + self.bias.data


fclayer = MyFCLayer(25, 10)
print(fclayer)

MyFCLayer()


In [9]:
a = torch.randn(1, 25)
fclayer(a)

tensor([[ -5.1787,   0.2756,  -8.5485,   7.4007,  -3.6588,   8.8778,  -4.9287,
          10.6064, -11.0471,  -2.9971]])

## 自定义一个Block

In [10]:
class LinearReluStack(nn.Module):
    def __init__(self):
        super().__init__()
        self.stack = nn.Sequential(
            nn.Linear(28 * 28, 100),
            ReluLayer(),
            nn.Linear(100, 100),
            ReluLayer(),
            MyFCLayer(100, 10),
        )

    def forward(self, x):
        return self.stack(x)


linear_relu_stack = LinearReluStack()
print(linear_relu_stack)

LinearReluStack(
  (stack): Sequential(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): ReluLayer()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): ReluLayer()
    (4): MyFCLayer()
  )
)


In [11]:
a = torch.randn(1, 28 * 28)
linear_relu_stack(a)

tensor([[-0.5678, -3.6984,  0.5302, -0.1042,  0.5509,  0.0973, -1.8260, -1.6011,
          1.2910, -2.4117]], grad_fn=<AddBackward0>)

## 自定义一个模型

In [12]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.stack = LinearReluStack()

    def forward(self, x):
        return self.stack(self.flatten(x))


model = NeuralNetwork()
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (stack): LinearReluStack(
    (stack): Sequential(
      (0): Linear(in_features=784, out_features=100, bias=True)
      (1): ReluLayer()
      (2): Linear(in_features=100, out_features=100, bias=True)
      (3): ReluLayer()
      (4): MyFCLayer()
    )
  )
)


In [13]:
a = torch.randn(1, 28, 28)
model(a)

tensor([[ 1.1757,  2.8397,  2.7330, -0.8250, -1.3345, -1.5037, -0.0362, -2.6218,
          1.4468, -1.3714]], grad_fn=<AddBackward0>)

## 模仿nn.Sequential

In [14]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            self.add_module(str(idx), module)

    def forward(self, x):
        # _modules是内部的一个OrderedDict
        for module in self._modules.values():
            x = module(x)
        return x


mlp = MySequential(nn.Linear(25, 100), nn.ReLU(), nn.Linear(100, 10))
print(mlp)

MySequential(
  (0): Linear(in_features=25, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)


# 参数

每一层的参数，我们可以通过`layer.bias`和`layer.weight`来访问，得到的是一个`nn.parameter.Parameter`的类型对象。

对于Sequential的模型，我们可以通过下标来访问每一层：`seqmodel[i]`

我们也可以通过`state_dict`来获取nn.Module中的所有层的参数。

In [15]:
mlp = nn.Sequential(nn.Linear(25, 100), nn.ReLU(), nn.Linear(100, 10))
first_layer = mlp[0]
first_layer.bias
first_layer.weight
first_layer.state_dict()
type(mlp.state_dict())

collections.OrderedDict

In [16]:
# 获取所有参数
print(*[(name, param.shape) for name, param in mlp.named_parameters()])

('0.weight', torch.Size([100, 25])) ('0.bias', torch.Size([100])) ('2.weight', torch.Size([10, 100])) ('2.bias', torch.Size([10]))


In [18]:
# 访问OrderedDict
mlp.state_dict()["2.weight"].shape

torch.Size([10, 100])

对于`nn.parameter.Parameter`类型的对象，我们可以通过`.data`与`.grad`拿到其数据与梯度。

In [19]:
first_layer.bias.shape, first_layer.bias.grad

(torch.Size([100]), None)

# 参数初始化

对整个网络应用某个初始化函数

In [20]:
def norm_init(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)


mlp.apply(norm_init)

Sequential(
  (0): Linear(in_features=25, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)

单独的某层layer应用初始化 

In [21]:
def xiaver_init(m):
    if type(m) == nn.Linear:
        nn.init.xavier_normal_(m.weight)


mlp[2].apply(xiaver_init)

Linear(in_features=100, out_features=10, bias=True)

# 多个layer共享参数

In [23]:
shared = nn.Linear(8, 8)  # 需要共享参数的layer
net = nn.Sequential(
    nn.Linear(4, 8), nn.ReLU(), shared, nn.ReLU(), shared, nn.ReLU(), nn.Linear(8, 1)
)

net[2]和net[4]是共享参数的，梯度累加。

In [25]:
model = torch.load('./data/mlp-model.pt')
model

Sequential(
  (0): Linear(in_features=25, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)