In [28]:
import torch


def dropout_layer(x,dropout):
    assert 0<=dropout<=1

    if dropout==0:
        return torch.zeros_like(x.shape)
    
    if dropout==1:
        return x
    
    mask=(torch.rand(x.shape)>dropout).float()
    return mask*x/(1.0-dropout)

In [37]:
num_input=784
num_hidden1=256
num_hidden2=256
num_output=10

from torch import nn

class Net(nn.Module):
    def __init__(self,dropout):
        super().__init__()
        self.lin1=nn.Linear(num_input,num_hidden1)
        self.lin2=nn.Linear(num_hidden1,num_hidden2)
        self.lin3=nn.Linear(num_hidden2,num_output)
        self.relu=nn.ReLU()
        self.dropout=dropout
    
    def forward(self,x):
        H1=self.relu(self.lin1(x.reshape(-1,num_input)))
        if self.training==True:
            H1=dropout_layer(H1,self.dropout)
        
        H2=self.relu(self.lin2(H1))
        if self.training==True:
            H2=dropout_layer(H2,self.dropout)
        
        out=self.lin3(H2)
        return out

        
    


In [35]:
import torchvision
from torchvision import transforms 
from torch.utils import data

def data_iter(batchsize,resize=None):
    trans=[transforms.ToTensor()]
    if resize:
        trans.insert(0,transforms.Resize(resize))
    trans=transforms.Compose(trans)
    mnist_train=torchvision.datasets.FashionMNIST(root="../data", train=True, transform=trans, download=True)
    mnist_test=torchvision.datasets.FashionMNIST(root="../data", train=False, transform=trans, download=True)
    return (data.DataLoader(mnist_train,batchsize,shuffle=True,num_workers=4),
            data.DataLoader(mnist_test,batchsize,shuffle=True,num_workers=4))

class Accumulator():
    def __init__(self,n):
        self.data=[0.0]*n
    
    def add(self,*args):
        self.data=[ a+ float(b) for a,b in zip(self.data,args)]
    
    def __getitem__(self,index):
        return self.data[index]
    
def accuracy(y_hat,y):
    y_hat=y_hat.argmax(axis=1)
    cmd=y_hat.type(y.dtype)==y
    return float(cmd.type(y.dtype).sum())
    
def train(net,loss,updater,num_epoch,train_iter):
    net.train()
    for i in range(num_epoch):
        metric=Accumulator(3)
        for x,y in train_iter:
            y_hat=net(x)
            l=loss(y_hat,y)
            updater.zero_grad()
            l.mean().backward()
            updater.step()
            metric.add(l.sum(),accuracy(y_hat,y),y.numel())
        print(f'epoch:{i} loss {metric[0]}  y_hat:{metric[1]} y:{metric[2]}')

def pred(net,test_iter):
    metric=Accumulator(2)
    labels=torchvision.datasets.FashionMNIST.classes
    for x,y in test_iter:
        predin=net(x).argmax(axis=1)
        r= predin==y
        metric.add(r.type(y.dtype).sum(),y.numel())
    return metric[0]/metric[1]



In [39]:
batch_size=10
num_epoch=20
train_iter,test_iter=data_iter(batch_size)

net=Net(0.5)
updater=torch.optim.SGD(net.parameters(),lr=0.01)
loss=nn.CrossEntropyLoss(reduction='none')
train(net,loss,updater,num_epoch,train_iter)
print(pred(net,test_iter))




epoch:0 loss 51698.996680378914  y_hat:41279.0 y:60000.0
epoch:1 loss 32517.294899210334  y_hat:48513.0 y:60000.0
epoch:2 loss 28871.74287673831  y_hat:49789.0 y:60000.0
epoch:3 loss 27039.62659691274  y_hat:50332.0 y:60000.0
epoch:4 loss 25953.44682379067  y_hat:50630.0 y:60000.0
epoch:5 loss 24963.55790693313  y_hat:50983.0 y:60000.0
epoch:6 loss 24127.25201525539  y_hat:51379.0 y:60000.0
epoch:7 loss 23610.92956962809  y_hat:51418.0 y:60000.0
epoch:8 loss 22993.120662186295  y_hat:51645.0 y:60000.0
epoch:9 loss 22543.337783791125  y_hat:51892.0 y:60000.0
epoch:10 loss 21994.60747434944  y_hat:52081.0 y:60000.0
epoch:11 loss 21525.778445899487  y_hat:52247.0 y:60000.0
epoch:12 loss 21192.96304800175  y_hat:52263.0 y:60000.0
epoch:13 loss 21030.457260012627  y_hat:52405.0 y:60000.0
epoch:14 loss 20771.588043342344  y_hat:52403.0 y:60000.0
epoch:15 loss 20489.450480254367  y_hat:52542.0 y:60000.0
epoch:16 loss 20068.213566461578  y_hat:52707.0 y:60000.0
epoch:17 loss 19879.67597090639 

In [47]:
dropout_num=0.8
net1=nn.Sequential(nn.Flatten(),
                   nn.Linear(num_input,num_hidden1),
                   nn.ReLU(),
                   nn.Dropout(dropout_num),
                   nn.Linear(num_hidden1,num_hidden2),
                   nn.ReLU(),
                   nn.Dropout(dropout_num),
                   nn.Linear(num_hidden2,num_output)
                   )

def init_weight(m):
    if type(m)==nn.Linear:
        nn.init.normal_(m.weight,std=0.01)
        nn.init.zeros_(m.bias)
net1.apply(init_weight)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Dropout(p=0.8, inplace=False)
  (4): Linear(in_features=256, out_features=256, bias=True)
  (5): ReLU()
  (6): Dropout(p=0.8, inplace=False)
  (7): Linear(in_features=256, out_features=10, bias=True)
)

In [48]:
loss1=nn.CrossEntropyLoss(reduction="none")
updater1=torch.optim.SGD(net1.parameters(),lr=0.01)
train(net1,loss1,updater1,num_epoch,train_iter)
print(pred(net1,test_iter))

epoch:0 loss 85538.64153027534  y_hat:26038.0 y:60000.0
epoch:1 loss 49983.568081855774  y_hat:40914.0 y:60000.0
epoch:2 loss 43787.959906592965  y_hat:43796.0 y:60000.0
epoch:3 loss 40846.84311938286  y_hat:45193.0 y:60000.0
epoch:4 loss 38683.54435905814  y_hat:46072.0 y:60000.0
epoch:5 loss 37678.725042402744  y_hat:46618.0 y:60000.0
epoch:6 loss 36415.48286372423  y_hat:47076.0 y:60000.0
epoch:7 loss 35637.57669147849  y_hat:47421.0 y:60000.0
epoch:8 loss 34905.621699392796  y_hat:47840.0 y:60000.0
epoch:9 loss 34501.57642225921  y_hat:47900.0 y:60000.0
epoch:10 loss 34044.94153620303  y_hat:47910.0 y:60000.0
epoch:11 loss 33803.00543563068  y_hat:48106.0 y:60000.0
epoch:12 loss 32871.379730299115  y_hat:48397.0 y:60000.0
epoch:13 loss 32976.99810536578  y_hat:48380.0 y:60000.0
epoch:14 loss 32574.25745292753  y_hat:48598.0 y:60000.0
epoch:15 loss 32147.38464485854  y_hat:48513.0 y:60000.0
epoch:16 loss 32168.527199551463  y_hat:48758.0 y:60000.0
epoch:17 loss 31626.167307399213  y