# Pytorch中的神经网络基本单元

In [1]:
import torch
from torch import nn

# nn.Module

nn.Module是神经网络结构的表示，它可以表示一个层，也可以表示一个结构块，也可以表示一个完整的模型结构。

## 自定义一个layer

In [2]:
class ReluLayer(nn.Module):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def forward(self, x):
        return (x > 0) * x

relu = ReluLayer()
print(relu)

ReluLayer()


In [3]:
a = torch.randn(2, 3)
relu(a)

tensor([[0.0584, -0.0000, -0.0000],
        [-0.0000, 1.0287, 0.4560]])

In [4]:
# 带参数的Layer
class MyFCLayer(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(in_dim, out_dim))
        self.bias = nn.Parameter(torch.randn(out_dim))

    def forward(self, x):
        return x.matmul(self.weights.data) + self.bias.data


fclayer = MyFCLayer(25, 10)
print(fclayer)

MyFCLayer()


In [5]:
a = torch.randn(1, 25)
fclayer(a)

tensor([[  2.6934,   3.8674,  -2.8628,  -1.8949,  -1.1168,  -2.3818, -10.8786,
           5.8959,   2.7713,  -0.1138]])

## 自定义一个Block

In [6]:
class LinearReluStack(nn.Module):
    def __init__(self):
        super().__init__()
        self.stack = nn.Sequential(
            nn.Linear(28 * 28, 100),
            ReluLayer(),
            nn.Linear(100, 100),
            ReluLayer(),
            MyFCLayer(100, 10),
        )

    def forward(self, x):
        return self.stack(x)


linear_relu_stack = LinearReluStack()
print(linear_relu_stack)

LinearReluStack(
  (stack): Sequential(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): ReluLayer()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): ReluLayer()
    (4): MyFCLayer()
  )
)


In [7]:
a = torch.randn(1, 28 * 28)
linear_relu_stack(a)

tensor([[-0.6612, -0.8403, -0.8506, -0.9030,  0.8851,  1.8003, -0.2188,  2.5977,
          2.4639, -0.9650]], grad_fn=<AddBackward0>)

## 自定义一个模型

In [8]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.stack = LinearReluStack()

    def forward(self, x):
        return self.stack(self.flatten(x))


model = NeuralNetwork()
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (stack): LinearReluStack(
    (stack): Sequential(
      (0): Linear(in_features=784, out_features=100, bias=True)
      (1): ReluLayer()
      (2): Linear(in_features=100, out_features=100, bias=True)
      (3): ReluLayer()
      (4): MyFCLayer()
    )
  )
)


In [9]:
a = torch.randn(1, 28, 28)
model(a)

tensor([[ 0.2421,  1.7578, -0.6474, -1.7894,  0.8258,  1.2447, -0.1379,  1.2987,
          2.3945,  1.8465]], grad_fn=<AddBackward0>)

## 模仿nn.Sequential

In [14]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            self.add_module(str(idx), module)

    def forward(self, x):
        # _modules是内部的一个OrderedDict
        for module in self._modules.values():
            x = module(x)
        return x


mlp = MySequential(nn.Linear(25, 100), nn.ReLU(), nn.Linear(100, 10))
print(mlp)

MySequential(
  (0): Linear(in_features=25, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)


# 参数

每一层的参数，我们可以通过`layer.bias`和`layer.weight`来访问，得到的是一个`nn.parameter.Parameter`的类型对象。

对于Sequential的模型，我们可以通过下标来访问每一层：`seqmodel[i]`

我们也可以通过`state_dict`来获取nn.Module中的所有层的参数。

In [10]:
mlp = nn.Sequential(nn.Linear(25, 100), nn.ReLU(), nn.Linear(100, 10))
first_layer = mlp[0]
first_layer.bias
first_layer.weight
first_layer.state_dict()
type(mlp.state_dict())

collections.OrderedDict

In [11]:
# 获取所有参数
print(*[(name, param.shape) for name, param in mlp.named_parameters()])

('0.weight', torch.Size([100, 25])) ('0.bias', torch.Size([100])) ('2.weight', torch.Size([10, 100])) ('2.bias', torch.Size([10]))


In [12]:
# 访问OrderedDict
mlp.state_dict()["2.weight"].shape

torch.Size([10, 100])

对于`nn.parameter.Parameter`类型的对象，我们可以通过`.data`与`.grad`拿到其数据与梯度。

In [13]:
first_layer.bias.shape, first_layer.bias.grad

(torch.Size([100]), None)

# 参数初始化

对整个网络应用某个初始化函数

In [14]:
def norm_init(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)


mlp.apply(norm_init)

Sequential(
  (0): Linear(in_features=25, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)

单独的某层layer应用初始化 

In [15]:
def xiaver_init(m):
    if type(m) == nn.Linear:
        nn.init.xavier_normal_(m.weight)


mlp[2].apply(xiaver_init)

Linear(in_features=100, out_features=10, bias=True)

# 多个layer共享参数

In [16]:
shared = nn.Linear(8, 8)  # 需要共享参数的layer
net = nn.Sequential(
    nn.Linear(4, 8), nn.ReLU(), shared, nn.ReLU(), shared, nn.ReLU(), nn.Linear(8, 1)
)

net[2]和net[4]是共享参数的，梯度累加。