In [3]:
%matplotlib inline
import torch
import torch.nn as nn 
import numpy as np 
import d2lzh as d2l

# 随机的将隐藏层某些单元的输出值设置为０
def dropout(X,drop_prob):
    X = X.float()
    assert 0 <= drop_prob <= 1
    keep_prob = 1 - drop_prob
    if keep_prob == 0:
        return torch.zeros_like(X)

    mask = (torch.randn(X.shape) < keep_prob).float()

    return mask * X / keep_prob

X = torch.arange(16).view(2,8)
print(dropout(X,0))
print(dropout(X,0.5))
print(dropout(X,1.0))

tensor([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14., 15.]])
tensor([[ 0.,  2.,  4.,  0.,  0.,  0., 12., 14.],
        [16.,  0.,  0., 22.,  0., 26.,  0.,  0.]])
tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]])


In [5]:
num_inputs,num_outputs,num_hiddens1,num_hiddens2 = 784,10,256,256

W1 = torch.tensor(np.random.normal(0,0.01,size=(num_inputs,num_hiddens1)),dtype=torch.float,requires_grad=True)
b1 = torch.zeros(num_hiddens1,requires_grad=True)
W2 = torch.tensor(np.random.normal(0,0.01,size=(num_hiddens1,num_hiddens2)),dtype=torch.float,requires_grad=True)
b2 = torch.zeros(num_hiddens2,requires_grad=True)
W3 = torch.tensor(np.random.normal(0,0.01,size=(num_hiddens2,num_outputs)),dtype=torch.float,requires_grad=True)
b3 = torch.zeros(num_outputs,requires_grad=True)

params = [W1,b1,W2,b2,W3,b3]

drop_prob1,drop_prob2 = 0.2,0.5

def net(X,is_training=True):
    X = X.view(-1,num_inputs)
    H1 = (torch.matmul(X,W1) + b1).relu()
    if is_training:
        H1 = dropout(H1,drop_prob1)  # 第一个隐藏层后接dropout
    H2 = (torch.matmul(H1,W2) + b2).relu()
    if is_training:
        H2 = dropout(H2,drop_prob2)

    return torch.matmul(H2,W3) + b3

num_epochs,lr,batch_size = 5,100.0,256
loss = torch.nn.CrossEntropyLoss()
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)

changeing train
epoch 1,loss 0.0049,train acc 0.518,test_acc 0.725
epoch 2,loss 0.0023,train acc 0.778,test_acc 0.811
epoch 3,loss 0.0020,train acc 0.816,test_acc 0.812
epoch 4,loss 0.0018,train acc 0.833,test_acc 0.835
epoch 5,loss 0.0017,train acc 0.844,test_acc 0.843


In [7]:
net = nn.Sequential(
    d2l.FlattenLayer(),
    nn.Linear(num_inputs,num_hiddens1),
    nn.ReLU(),
    nn.Dropout(drop_prob1),
    nn.Linear(num_hiddens1,num_hiddens2),
    nn.ReLU(),
    nn.Dropout(drop_prob2),
    nn.Linear(num_hiddens2,10)
)

for param in net.parameters():
    nn.init.normal_(param,mean=0,std=0.01)

optimizer = torch.optim.SGD(net.parameters(),lr=0.5)
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,None,None,optimizer)

changeing train
epoch 1,loss 0.0044,train acc 0.571,test_acc 0.727
epoch 2,loss 0.0023,train acc 0.786,test_acc 0.823
epoch 3,loss 0.0019,train acc 0.825,test_acc 0.823
epoch 4,loss 0.0017,train acc 0.838,test_acc 0.843
epoch 5,loss 0.0016,train acc 0.850,test_acc 0.848
