In [4]:
import torch
from torch import nn

# dropout置零的那些元素权重不会被更新，一般 0.5 0.1 0.9
# 如果想要可重复性的话： 用固定的随机种子+不用cudnn（并行处理，精度丢失，导致数据相加顺序不一致结果不一致）

def dropout_layer(X, dropout):
    assert 0 <= dropout <= 1
    # dropout == 1：表示所有神经元都被丢弃，返回全 0。
    if dropout == 1:
        return torch.zeros_like(X)
    # dropout == 0：表示没有丢弃，直接返回输入。
    if dropout == 0:
        return X
    # mask = ...：生成一个和输入 X 形状一样的随机 mask，元素值在 (0,1) 之间，大于 dropout 的为 1，否则为 0。
    mask = (torch.Tensor(X.shape).uniform_(0, 1) > dropout).float()
    # mask * X / (1.0 - dropout)：按 mask 进行丢弃操作，并除以 (1 - dropout) 进行归一化，使得在训练和测试阶段输出期望一致。
    return mask * X / (1.0 - dropout)

X = torch.arange(16, dtype=torch.float32).reshape((2, 8))

print(X)
print(dropout_layer(X, 0.0))
print(dropout_layer(X, 0.5))
print(dropout_layer(X, 1.0))



tensor([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14., 15.]])
tensor([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14., 15.]])
tensor([[ 0.,  2.,  4.,  0.,  8.,  0., 12.,  0.],
        [16.,  0.,  0., 22.,  0., 26.,  0.,  0.]])
tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]])


In [5]:
import torch
from torch import nn
# 网络结构的参数
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256
# dropout1, dropout2 = 0.2, 0.5
dropout1, dropout2 = 0.0, 0.0

class Net(nn.Module):
    def __init__(self, num_inputs, num_outputs, num_hiddens1, num_hiddens2, is_training=True):
        super(Net, self).__init__()
        self.num_inputs = num_inputs
        self.training = is_training
        self.lin1 = nn.Linear(num_inputs, num_hiddens1)
        self.lin2 = nn.Linear(num_hiddens1, num_hiddens2)
        self.lin3 = nn.Linear(num_hiddens2, num_outputs)
        self.relu = nn.ReLU()

    def forward(self, X):
        H1 = self.relu(self.lin1(X.reshape((-1, self.num_inputs))))
        # dropout一般作用在全连接层的输出上，只在训练的时候用
        if self.training:
            H1 = dropout_layer(H1, dropout1)
        H2 = self.relu(self.lin2(H1))
        if self.training:
            H2 = dropout_layer(H2, dropout2)
        out = self.lin3(H2)
        return out

net = Net(num_inputs, num_outputs, num_hiddens1, num_hiddens2, is_training=True)


In [10]:
import torch
from torch import nn
from ml_utils import *

# 超参数
num_epochs, lr, batch_size = 20, 0.5, 256

# 损失函数
loss = nn.CrossEntropyLoss()

# 加载数据
train_iter, test_iter = load_data_fashion_mnist(batch_size)

# 初始化模型
net = Net(num_inputs, num_outputs, num_hiddens1, num_hiddens2, is_training=True)

# 优化器
trainer = torch.optim.SGD(net.parameters(), lr=lr)




In [None]:
# 模型训练
train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)

In [11]:
# dropout简洁实现
net = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 256),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(256, 256),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(256, 128),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(128, num_outputs),
)

def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)

net.apply(init_weights)

trainer = torch.optim.SGD(net.parameters(), lr=lr)
train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)

train_metrics = (1.9759579210281373, 0.217)
test_acc = 0.5208
train_metrics = (0.9412045572598775, 0.6281333333333333)
test_acc = 0.7499
train_metrics = (0.6764810991923015, 0.7425)
test_acc = 0.7802
train_metrics = (0.5937331496874492, 0.7823)
test_acc = 0.8157
train_metrics = (0.5436917445341746, 0.80345)
test_acc = 0.8254
train_metrics = (0.5128901916186015, 0.8185)
test_acc = 0.7864
train_metrics = (0.49498618133862815, 0.8228833333333333)
test_acc = 0.8387
train_metrics = (0.47302289231618244, 0.8306166666666667)
test_acc = 0.8437
train_metrics = (0.4554902512868245, 0.8383833333333334)
test_acc = 0.8382
train_metrics = (0.44825654169718426, 0.8411333333333333)
test_acc = 0.8478
train_metrics = (0.43650185165405275, 0.8459333333333333)
test_acc = 0.8495
train_metrics = (0.43124554282824196, 0.84645)
test_acc = 0.8464
train_metrics = (0.42215665702819827, 0.8512166666666666)
test_acc = 0.8575
train_metrics = (0.41457356435457865, 0.8526333333333334)
test_acc = 0.8572
train_metrics 