# 深度学习基础5_丢弃法(dropout)

In [1]:
%matplotlib inline
import torch
import torch.nn as nn
import numpy as np
import sys
import d2lzh_pytorch as d2l

## 定义dropout
def dropout(X,drop_prob):
    X = X.float()
    assert 0 <= drop_prob <= 1
    keep_prob = 1 - drop_prob
    # 元素全部丢弃  
    if keep_prob == 0:
        return torch.zeros_like(X)
    mask = (torch.rand(X.shape) < keep_prob).float()
    
    return mask * X / keep_prob

D:\Anaconda\envs\torch\lib\site-packages\numpy\.libs\libopenblas.JPIJNSWNNAN3CE6LLI5FWSPHUT2VXMTH.gfortran-win_amd64.dll
D:\Anaconda\envs\torch\lib\site-packages\numpy\.libs\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll
  stacklevel=1)


In [2]:
X = (torch.arange(16)/15).view(2, 8)
dropout(X, 0.5)

tensor([[0.0000, 0.1333, 0.2667, 0.4000, 0.0000, 0.0000, 0.0000, 0.0000],
        [1.0667, 0.0000, 0.0000, 0.0000, 1.6000, 0.0000, 0.0000, 0.0000]])

In [3]:
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256

W1 = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, num_hiddens1)), dtype=torch.float, requires_grad=True)
b1 = torch.zeros(num_hiddens1, requires_grad=True)
W2 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens1, num_hiddens2)), dtype=torch.float, requires_grad=True)
b2 = torch.zeros(num_hiddens2, requires_grad=True)
W3 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens2, num_outputs)), dtype=torch.float, requires_grad=True)
b3 = torch.zeros(num_outputs, requires_grad=True)

params = [W1, b1, W2, b2, W3, b3]

In [4]:
drop_prob1,drop_prob2 = 0.2 , 0.5
def net(X,is_training = True):
    X = X.view(-1,num_inputs)
    H1 = (torch.matmul(X,W1)+b1).relu()
    # 只在训练模型时使用丢弃法
    if is_training:
        H1 = dropout(H1,drop_prob1)
        # 在第一层全连接后添加丢弃层
        
    H2 = (torch.matmul(H1, W2) + b2).relu()
    if is_training:
        H2 = dropout(H2, drop_prob2)  
        # 在第二层全连接后添加丢弃层
        
    return torch.matmul(H2, W3) + b3

`isinstance()` 函数来判断一个对象是否是一个已知的类型，类似 `type()`。

- `isinstance()` 与 `type()` 区别：
    - `type()` 不会认为子类是一种父类类型，不考虑继承关系。
    - `isinstance()` 会认为子类是一种父类类型，考虑继承关系。
如果要判断两个类型是否相同推荐使用 isinstance()。


- **语法:**       
`isinstance(object, classinfo)`

参数:
```
object -- 实例对象。
classinfo -- 可以是直接或间接类名、基本类型或者由它们组成的元组。
```

```

def evaluate_accuracy(data_iter,net):
    acc_sum,n = 0.0, 0
    for X,y in data_iter:
        if isinstance(net,torch.nn.Module):
            net.eval()# 评估模式，关闭dropout   
            acc_sum += (net(X).argmax(dim = 1) == y).float().sum().item()
            net.train()# 训练模式   
        else:# 自定义模型
            # func.__code__.co_argcount：返回函数的参数个数，参数个数不包含*args与**kwargs
            if("is_training" in net.__code__.co_varnames):
                # 将is_training设置成False
                acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
            else:
                acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
        n += y.shape[0]
    return acc_sum / n
    
```

In [5]:
num_epochs, lr, batch_size = 5, 100.0, 256
loss = torch.nn.CrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 0.0045, train acc 0.551, test acc 0.722, epoch time 8.1314
epoch 2, loss 0.0023, train acc 0.788, test acc 0.782, epoch time 7.9757
epoch 3, loss 0.0019, train acc 0.822, test acc 0.821, epoch time 7.8871
epoch 4, loss 0.0017, train acc 0.839, test acc 0.833, epoch time 8.5571
epoch 5, loss 0.0016, train acc 0.848, test acc 0.789, epoch time 8.3320


## dropout简洁实现   



In [6]:
net = nn.Sequential(
        d2l.FlattenLayer(),
        nn.Linear(num_inputs, num_hiddens1),
        nn.ReLU(),
        nn.Dropout(drop_prob1),
        nn.Linear(num_hiddens1, num_hiddens2), 
        nn.ReLU(),
        nn.Dropout(drop_prob2),
        nn.Linear(num_hiddens2, 10)
        )

for param in net.parameters():
    nn.init.normal_(param, mean=0, std=0.01)
    
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

epoch 1, loss 0.0044, train acc 0.566, test acc 0.778, epoch time 8.0185
epoch 2, loss 0.0022, train acc 0.789, test acc 0.798, epoch time 8.0809
epoch 3, loss 0.0019, train acc 0.819, test acc 0.793, epoch time 8.1905
epoch 4, loss 0.0017, train acc 0.841, test acc 0.800, epoch time 8.1412
epoch 5, loss 0.0016, train acc 0.849, test acc 0.846, epoch time 8.0834
