In [1]:
import torch
import torchvision
import numpy as np
from torch import nn,optim

In [58]:
def dropout(x,drop_prob):
    x=x.float()
    keep_prob=1-drop_prob
    
    assert 0<=keep_prob<=1 ,' drop prob error !'
    if drop_prob == 1:
        return torch.zeros_like(x)
    
    mask=(torch.randn(x.shape)<keep_prob).float()
    return x*mask/keep_prob
    
    

In [59]:
a=torch.arange(16).view(2,8)
dropout(a,0.5)

tensor([[ 0.,  2.,  4.,  6.,  8., 10., 12., 14.],
        [16.,  0.,  0.,  0., 24., 26., 28., 30.]])

In [60]:
num_inputs,num_outputs,num_hiddens1,num_hiddens2=784,10,256,256

In [61]:
w1=torch.Tensor(np.random.normal(0,0.01,(num_inputs,num_hiddens1)))
w2=torch.Tensor(np.random.normal(0,0.01,(num_hiddens1,num_hiddens2)))
w3=torch.Tensor(np.random.normal(0,0.01,(num_hiddens2,num_outputs)))
b1=torch.zeros(num_hiddens1,dtype=torch.float32)
b2=torch.zeros(num_hiddens2,dtype=torch.float32)
b3=torch.zeros(num_outputs,dtype=torch.float32)

In [62]:
params=[w1,b1,w2,b2,w3,b3]
for p in params:
    p.requires_grad_(requires_grad=True)

In [63]:
drop_prob1,drop_prob2=0.2,0.5

In [64]:
def net(x,is_training=True):
    x=x.view(-1,num_inputs)
    h1=(torch.matmul(x,w1)+b1).relu()
    if is_training:
        h1=dropout(h1,drop_prob1)
    h2=(torch.matmul(h1,w2)+b2).relu()
    if is_training:
        h2=dropout(h2,drop_prob2)
    return torch.matmul(h2,w3)+b3

In [65]:
def eval_acc(data_iter,net):
    acc_sum,n=0,0
    for x,y in data_iter:
        if isinstance(net,nn.Module):
            net.eval()
            acc_sum+=(net(x).argmax(dim=1)==y).float().sum().item()
            net.train()
        else :
            if 'is_training' in net.__code__.co_varnames:
                acc_sum+=(net(x,is_training=False).argmax(dim=1)==y).float().sum().item()
            else :
                acc_sum+=(net(x).argmax(dim=1)==y).float().sum().item()

        n+=len(y)
    return acc_sum/n

In [66]:
num_epochs,lr,batch_size=5,100.0,256
loss=torch.nn.CrossEntropyLoss()

In [67]:
def sgd(params,lr,batch_size):
    for p in params:
        p.data -= lr * p.grad / batch_size

In [68]:
import torchvision 
from torchvision import transforms

In [69]:
batch_size=256
mnist_train=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=True,
    download=True,transform=transforms.ToTensor())
mnist_test=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=False,
    download=True,transform=transforms.ToTensor())

In [70]:
train_iter=torch.utils.data.DataLoader(mnist_train,batch_size,shuffle=True)
test_iter=torch.utils.data.DataLoader(mnist_test,batch_size,shuffle=True)

In [73]:
def train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr):
    for i in range(num_epochs):
        train_l,train_acc,test_acc,n=0,0,0,0
        for x,y in train_iter:
            y_hat=net(x)
            l=loss(y_hat,y).sum()
            
            if params[0].grad is not None:
                for p in params:
                    p.grad.data.zero_()
            
            l.backward()
            sgd(params,lr,batch_size)
            train_l+=l.item()
            train_acc+=(y_hat.argmax(dim=1)==y).float().sum().item()
            n+=len(y)
        test_acc=eval_acc(test_iter,net)
        print('epoch %d,train loss %.4f,train acc %.3f,test acc %.3f' %(
        i+1,train_l/n,train_acc/n,test_acc))
            
    

In [74]:
train_ch3(net, train_iter, test_iter, loss, num_epochs,batch_size, params, lr)

epoch 1,train loss 0.0045,train acc 0.554,test acc 0.649
epoch 2,train loss 0.0023,train acc 0.784,test acc 0.742
epoch 3,train loss 0.0019,train acc 0.826,test acc 0.805
epoch 4,train loss 0.0017,train acc 0.841,test acc 0.833
epoch 5,train loss 0.0016,train acc 0.850,test acc 0.840


drop out by nn

In [97]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self,x):
        return x.view(-1,784)
        
        

In [98]:
net=nn.Sequential(
    FlattenLayer(),
    nn.Linear(num_inputs,num_hiddens1),
    nn.ReLU(),
    nn.Dropout(drop_prob1),
    nn.Linear(num_hiddens1,num_hiddens2),
    nn.ReLU(),
    nn.Dropout(drop_prob2),
    nn.Linear(num_hiddens2,num_outputs)
)

In [99]:
for p in net.parameters():
    nn.init.normal_(p,mean=0,std=0.01)

In [100]:
optimizer=optim.SGD(net.parameters(),lr=0.5)

In [101]:
def train_ch3_pytorch(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr
                      ,optimizer):
    for i in range(num_epochs):
        train_l,train_acc,test_acc,n=0,0,0,0
        for x,y in train_iter:
            y_hat=net(x)
            l=loss(y_hat,y).sum()
            
#             if params[0].grad is not None:
#                 for p in params:
#                     p.grad.data.zero_()
            optimizer.zero_grad()
            
            l.backward()
#             sgd(params,lr,batch_size)
            optimizer.step()
    
            train_l+=l.item()
            train_acc+=(y_hat.argmax(dim=1)==y).float().sum().item()
            n+=len(y)
        test_acc=eval_acc(test_iter,net)
        print('epoch %d,train loss %.4f,train acc %.3f,test acc %.3f' %(
        i+1,train_l/n,train_acc/n,test_acc))
            
    

In [102]:
train_ch3_pytorch(net, train_iter, test_iter, loss, num_epochs,batch_size, None, None, optimizer)

epoch 1,train loss 0.0045,train acc 0.550,test acc 0.756
epoch 2,train loss 0.0022,train acc 0.788,test acc 0.774
epoch 3,train loss 0.0019,train acc 0.823,test acc 0.782
epoch 4,train loss 0.0017,train acc 0.839,test acc 0.839
epoch 5,train loss 0.0016,train acc 0.847,test acc 0.834
