In [57]:
import torch
import torchvision
import numpy as np
from torch import nn,optim

In [6]:
def dropout(x,drop_prob):
    x=x.float()
    assert 0<=drop_prob<=1 , 'drop prob must between 0 and 1'
    keep_drop=1-drop_prob
    
    if keep_drop==0:
        return torch.zeros_like(x)
    
    mask=(torch.randn(x.shape)<keep_drop).float()
    return x*mask/keep_drop
    
    

In [8]:
a=torch.arange(16).view(2,8)
dropout(a,0.5)

tensor([[ 0.,  2.,  4.,  6.,  8., 10., 12., 14.],
        [16.,  0.,  0.,  0., 24., 26., 28., 30.]])

In [26]:
num_inputs,num_outputs,num_hiddens1,num_hiddens2=784,10,256,256

In [40]:
w1=torch.Tensor(np.random.normal(0,0.01,(num_inputs,num_hiddens1)))
w2=torch.Tensor(np.random.normal(0,0.01,(num_hiddens1,num_hiddens2)))
w3=torch.Tensor(np.random.normal(0,0.01,(num_hiddens2,num_outputs)))
b1=torch.zeros(num_hiddens1,dtype=torch.float32)
b2=torch.zeros(num_hiddens2,dtype=torch.float32)
b3=torch.zeros(num_outputs,dtype=torch.float32)

In [41]:
params=[w1,b1,w2,b2,w3,b3]
for p in params:
    p.requires_grad_(requires_grad=True)

In [42]:
drop_prob1,drop_prob2=0.2,0.5

In [43]:
def net(x,is_training=True):
    x=x.view(-1,num_inputs)
    h1=(torch.matmul(x,w1)+b1).relu()
    if is_training:
        h1=dropout(h1,drop_prob1)
    h2=(torch.matmul(h1,w2)+b2).relu()
    if is_training:
        h2=dropout(h2,drop_prob2)
    h3=torch.matmul(h2,w3)+b3
    return h3
        

In [61]:
def eval_acc(data_iter,net):
    acc_sum,n=0,0
    for x,y in data_iter:
        if isinstance(net,nn.Module):
            net.eval()
            acc_sum+=(net(x).argmax(dim=1)==y).float().sum().item()
            net.train()
        else:
            if 'is_training' in net.__code__.co_varnames:
                acc_sum+=(net(x,is_training=False).argmax(dim=1)==y).float().sum().item()
            else:
                acc_sum+=(net(x).argmax(dim=1)==y).float().sum().item()
        n+=len(y)
    return acc_sum/n

In [45]:
num_epochs,lr,batch_size=5,100.0,256

In [46]:
loss=torch.nn.CrossEntropyLoss()

In [47]:
def sgd(params,lr,batch_size):
    for p in params:
        p.data -= lr * p.grad/batch_size
    

In [48]:
import torchvision
from torchvision import transforms

In [49]:
batch_size=256
mnist_train=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=True,
    download=True,transform=transforms.ToTensor())
mnist_test=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=False,
    download=True,transform=transforms.ToTensor())

In [50]:
train_iter=torch.utils.data.DataLoader(mnist_train,batch_size,shuffle=True)
test_iter=torch.utils.data.DataLoader(mnist_test,batch_size,shuffle=True)

In [51]:
def train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr):
    for i  in range(num_epochs):
        train_l,train_acc,test_acc,n=0,0,0,0
        for x,y in train_iter:
            y_hat=net(x)
            l=loss(y_hat,y).sum()
            
            if params[0].grad is not None:
                for p in params:
                    p.grad.data.zero_()
            l.backward()
            sgd(params,lr,batch_size)
            train_l+=l.item()
            train_acc+=(y_hat.argmax(dim=1)==y).float().sum().item()
            n+=len(y)
        test_acc=eval_acc(test_iter,net)
        print('epoch %d,train loss %.4f,train acc %.3f,test acc %.3f' % (
        i+1,train_l/n,train_acc/n,test_acc))
            
    

In [52]:
train_ch3(net, train_iter, test_iter, loss, num_epochs,batch_size, params, lr)

epoch 1,train loss 0.0044,train acc 0.569,test acc 0.667
epoch 2,train loss 0.0022,train acc 0.791,test acc 0.801
epoch 3,train loss 0.0018,train acc 0.826,test acc 0.816
epoch 4,train loss 0.0017,train acc 0.841,test acc 0.843
epoch 5,train loss 0.0016,train acc 0.853,test acc 0.841


dropout by nn

In [64]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,x):
        return x.view(-1,784)

In [65]:
net = nn.Sequential(FlattenLayer(), nn.Linear(num_inputs, num_hiddens1),
                   nn.ReLU(), nn.Dropout(drop_prob1),
                   nn.Linear(num_hiddens1, num_hiddens2), nn.ReLU(),
                   nn.Dropout(), nn.Linear(num_hiddens2, num_outputs))

In [66]:
for p in net.parameters():
    nn.init.normal_(p,mean=0,std=0.01)

In [67]:
optimizer=optim.SGD(net.parameters(),lr=0.5)

In [68]:
def train_ch3_pytorch(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr,optimizer):
    for i in range(num_epochs):
        train_l,train_acc,test_acc,n=0,0,0,0
        for x,y in train_iter:
            y_hat=net(x)
            l=loss(y_hat,y).sum()
            
            optimizer.zero_grad()
            
            l.backward()
            optimizer.step()
            train_l +=l.item()
            train_acc+=(y_hat.argmax(dim=1)==y).float().sum().item()
            n+=len(y)
        test_acc = eval_acc(test_iter,net)
        print('epoch %d,train loss %.4f,train acc %.4f,test acc %.4f' % (
        i+1,train_l/n,train_acc/n,test_acc))

In [69]:
train_ch3_pytorch(net, train_iter, test_iter, loss, num_epochs,batch_size, None, None, optimizer)

epoch 1,train loss 0.0045,train acc 0.5614,test acc 0.7678
epoch 2,train loss 0.0022,train acc 0.7882,test acc 0.7961
epoch 3,train loss 0.0019,train acc 0.8210,test acc 0.8317
epoch 4,train loss 0.0017,train acc 0.8380,test acc 0.8280
epoch 5,train loss 0.0016,train acc 0.8487,test acc 0.8480
