In [1]:
import torch
import torchvision
import numpy as np
from torch import nn,optim

In [35]:
def dropout(x,drop_prob):
    x=x.float()
    assert 0<=drop_prob<=1
    keep_prob = 1-drop_prob
    
    if keep_prob ==0:
        return torch.zeros_like(x)
    mask=(torch.randn(x.shape) < keep_prob).float()
#     print(mask)
    
    return mask * x / keep_prob


In [8]:
x=torch.arange(16).view(2,8)
dropout(x,0)

tensor([[1., 1., 0., 1., 0., 0., 1., 1.],
        [1., 0., 1., 1., 1., 1., 1., 1.]])


tensor([[ 0.,  1.,  0.,  3.,  0.,  0.,  6.,  7.],
        [ 8.,  0., 10., 11., 12., 13., 14., 15.]])

In [13]:
x=torch.arange(16).view(2,8)
dropout(x,0.25)

tensor([[0., 0., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1.]])


tensor([[ 0.0000,  0.0000,  2.6667,  4.0000,  5.3333,  6.6667,  8.0000,  9.3333],
        [10.6667, 12.0000, 13.3333, 14.6667, 16.0000, 17.3333, 18.6667, 20.0000]])

In [15]:
x=torch.arange(16).view(2,8)
dropout(x,0.7)

tensor([[1., 1., 1., 1., 1., 0., 0., 1.],
        [1., 1., 1., 1., 0., 1., 0., 1.]])


tensor([[ 0.0000,  3.3333,  6.6667, 10.0000, 13.3333,  0.0000,  0.0000, 23.3333],
        [26.6667, 30.0000, 33.3333, 36.6667,  0.0000, 43.3333,  0.0000, 50.0000]])

In [22]:
num_inputs,num_outputs,num_hiddens1,num_hiddens2=784,10,256,256
w1=torch.Tensor(np.random.normal(0,0.01,size=(num_inputs,num_hiddens1)))
w2=torch.Tensor(np.random.normal(0,0.01,size=(num_hiddens1,num_hiddens2)))
w3=torch.Tensor(np.random.normal(0,0.01,size=(num_hiddens2,num_outputs)))

b1=torch.zeros(num_hiddens1)
b2=torch.zeros(num_hiddens2)
b3=torch.zeros(num_outputs)

params=[w1,b1,w2,b2,w3,b3]
for p in params:
    p.requires_grad_(requires_grad=True)

In [36]:
drop_prob1,drop_prob2=0.2,0.5

def net(x,is_training=True):
    x=x.view(-1,num_inputs)
    h1=(torch.matmul(x,w1)+b1).relu()
    if is_training:
        h1=dropout(h1,drop_prob1)
    h2=(torch.matmul(h1,w2)+b2).relu()
    if is_training:
        h2=dropout(h2,drop_prob2)
    return torch.matmul(h2,w3)+b3

In [37]:
def evaluate_acc(data_iter,net):
    acc_sum,n=0,0
    for x, y in data_iter:
        if isinstance(net,torch.nn.Module):
            net.eval()
            acc_sum +=(net(x).argmax(dim=1)==y).float().sum().item()
            net.train()
        else:
            if('is_training' in net.__code__.co_varnames):
                acc_sum +=(net(x,is_training=False).argmax(dim=1)==
                           y).float().sum().item()
            else:
                acc_sum+=(net(x).argmax(dim=1)==y).float().sum().item()
        n+=y.shape[0]
    return acc_sum /n
                

In [38]:
from torchvision import transforms

In [39]:
batch_size=256
mnist_train=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=True,
    download=True,transform=transforms.ToTensor())
mnist_test=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=False,
    download=True,transform=transforms.ToTensor())

In [40]:
def sgd(params,lr,batch_size):
    for p in params:
        p.data -= lr*p.grad/batch_size

In [41]:
def train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,
             params=None,lr=None,optimizer=None):
    for epoch in range(num_epochs):
        train_l,train_acc,n=0,0,0
        for x,y in train_iter:
            y_hat=net(x)
            l=loss(y_hat,y).sum()
            
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for p in params:
                    p.grad.data.zero_()
            
            l.backward()
            if optimizer is None:
                sgd(params,lr,batch_size)
            else:
                optimizer.step()
            
            train_l +=l.item()
            train_acc +=(y_hat.argmax(dim=1)==y).sum().item()
            n+=y.shape[0]
        test_acc=evaluate_acc(test_iter,net)
        print('epoch %d,loss %.4f,train acc %.3f,test acc %.3f' % (
        epoch +1,train_l/n,train_acc/n,test_acc))
            
            
                
                

In [42]:
num_epochs,lr,batch_size=5,100,256
loss=torch.nn.CrossEntropyLoss()
train_iter=torch.utils.data.DataLoader(mnist_train,batch_size,shuffle=True)
test_iter=torch.utils.data.DataLoader(mnist_test,batch_size,shuffle=False)


In [43]:
train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)

epoch 1,loss 0.0015,train acc 0.859,test acc 0.836
epoch 2,loss 0.0014,train acc 0.865,test acc 0.861
epoch 3,loss 0.0014,train acc 0.868,test acc 0.833
epoch 4,loss 0.0013,train acc 0.873,test acc 0.849
epoch 5,loss 0.0013,train acc 0.880,test acc 0.821
