In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
from pathlib import Path

sys.path.insert(0, Path('').resolve().parent.parent.as_posix())
from utils import dl_modules

## [**$L_2$范数惩罚**]

In [2]:
def l2_penalty(w):
    return torch.sum(w.pow(2)) / 2

## [**Dropout层**]

In [3]:
def dropout_layer(X, dropout):
    assert 0 <= dropout <= 1
    if dropout == 1:
        return torch.zeros_like(X)
    if dropout == 0:
        return X
    mask = (torch.randn(X.shape) > dropout).float()
    return mask * X / (1.0 - dropout)


X = torch.arange(16, dtype=torch.float32).reshape((2, 8))
print(X)
print(dropout_layer(X, 0.0))
print(dropout_layer(X, 0.5))
print(dropout_layer(X, 1.0))

tensor([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14., 15.]])
tensor([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14., 15.]])
tensor([[ 0.,  2.,  0.,  6.,  8.,  0.,  0., 14.],
        [ 0.,  0., 20.,  0.,  0.,  0., 28.,  0.]])
tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]])


## [**顺序块**]

In [4]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            # 这里，module是Module子类的一个实例。我们把它保存在'Module'类的成员
            # 变量_modules中。_module的类型是OrderedDict
            self._modules[str(idx)] = module

    def forward(self, X):
        # OrderedDict保证了按照成员添加的顺序遍历它们
        for block in self._modules.values():
            X = block(X)
        return X

net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
X = torch.rand(2, 20)
net(X)

tensor([[-0.0087, -0.1957,  0.1385,  0.0419,  0.0254,  0.0824,  0.1099, -0.1397,
         -0.0098, -0.1584],
        [ 0.0094, -0.1174,  0.1932, -0.0154,  0.0370, -0.0551,  0.1226, -0.1001,
          0.0874, -0.1661]], grad_fn=<AddmmBackward0>)

### [**自定义初始化**]

有时，深度学习框架没有提供我们需要的初始化方法。
在下面的例子中，我们使用以下的分布为任意权重参数$w$定义初始化方法：

$$
\begin{aligned}
    w \sim \begin{cases}
        U(5, 10) & \text{ 可能性 } \frac{1}{4} \\
            0    & \text{ 可能性 } \frac{1}{2} \\
        U(-10, -5) & \text{ 可能性 } \frac{1}{4}
    \end{cases}
\end{aligned}
$$


In [5]:
def my_init(m):
    if type(m) == nn.Linear:
        print("Init", *[(name, param.shape) for name, param in m.named_parameters()][0])
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5

net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
net.apply(my_init)
net[0].weight.data[0], net[0].bias.data[0]

Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


(tensor([-9.3765, -0.0000,  5.8128, -0.0000]), tensor(-0.0594))

## [**带参数的层**]

In [6]:
class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units), requires_grad=True)
        self.bias = nn.Parameter(torch.randn(units,), requires_grad=True)
    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)
    
linear = MyLinear(5, 3)
linear.weight

Parameter containing:
tensor([[ 0.2368,  0.1044, -1.6396],
        [-0.6161, -0.9964, -1.0342],
        [-0.9435, -0.7490,  1.7517],
        [-0.5619, -1.6547, -0.4187],
        [-1.0583,  1.3143,  0.1783]], requires_grad=True)

In [7]:
linear(torch.rand(2, 5))

tensor([[0.0000, 0.0000, 0.0000],
        [0.0000, 0.8635, 0.0000]])