In [1]:
import torch
import torchvision
import numpy as np
from torch import nn,optim

In [2]:
a=np.array([[1,2,3],[4,5,6]])
b=np.array([[1,2,3],[4,5,6]])
a*b

array([[ 1,  4,  9],
       [16, 25, 36]])

In [5]:
def dropout(x,drop_prob):
    x=x.float()
    assert 0<=drop_prob<=1, 'drop prob error !'
    keep_prob= 1-drop_prob
    
    if drop_prob==1:
        return torch.zeros_like(x)
#     mask=torch.ones_like(x.shape)
    mask=(torch.randn(x.shape)<keep_prob).float()
    
    return x*mask/keep_prob

In [12]:
x=torch.arange(16).view(2,8)
dropout(x,0)

tensor([[ 0.,  1.,  2.,  3.,  0.,  5.,  0.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14., 15.]])

In [13]:
num_inputs,num_outputs,num_hiddens1,num_hiddens2=784,10,256,256
w1=torch.Tensor(np.random.normal(0,0.01,size=(num_inputs,num_hiddens1)))
w2=torch.Tensor(np.random.normal(0,0.01,size=(num_hiddens1,num_hiddens2)))
w3=torch.Tensor(np.random.normal(0,0.01,size=(num_hiddens2,num_outputs)))
b1=torch.zeros(num_hiddens1,dtype=torch.float32)
b2=torch.zeros(num_hiddens2,dtype=torch.float32)
b3=torch.zeros(num_outputs,dtype=torch.float32)

params=[w1,b1,w2,b2,w3,b3]
for p in params:
    p.requires_grad_(requires_grad=True)

In [14]:
drop_prob1,drop_prob2=0.2,0.5

In [52]:
def net(x,is_training=True):
    x=x.view(-1,num_inputs)
    h1=(torch.matmul(x,w1)+b1).relu()
    if is_training :
        h1=dropout(h1,drop_prob1)
    h2=(torch.matmul(h1,w2)+b2).relu()
    if is_training:
        h2=dropout(h2,drop_prob2).relu()
    return torch.matmul(h2,w3)+b3

In [53]:
def eval_acc(data_iter,net):
    acc_sum,n=0,0
    for x,y in data_iter:
        if isinstance(net,nn.Module):
            net.eval()
            acc_sum +=(net(x).argmax(dim=1)==y).float().sum().item()
            net.train()
        else:
            if ('is_training' in net.__code__.co_varnames):
                acc_sum+=(net(x,is_training=False).argmax(dim=1)==y).float().sum().item()
            else:
                acc+=(net(x).argmax(dim=1)==y).float().sum().item()
        n+=len(y)
    return acc_sum/n
                

In [54]:
num_epochs,lr,batch_size=5,100.0,256
loss=torch.nn.CrossEntropyLoss()

In [55]:
def sgd(params,lr,batch_size):
    for p in params:
        p.data -=lr * p.grad /batch_size

In [56]:
def train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr,optimizer=None):
    for i in range(num_epochs):
        train_l,train_acc,test_acc,n=0,0,0,0
        for x,y in train_iter:
            y_hat=net(x,True)
            l=loss(y_hat,y).sum()
            
            if optimizer is None:
                if params[0].grad is not None:
                    for p in params:
                        p.grad.data.zero_()
            else:
                optimizer.zero_grad()
            
            l.backward()
            
            if optimizer is None:
                sgd(params,lr,batch_size)
            else:
                optimizer.step()
            
            train_l+=l.item()
            train_acc+=(y_hat.argmax(dim=1)==y).float().sum().item()
            n+=len(y)
        test_acc +=eval_acc(test_iter,net)
        print('epoch %d ,train loss %.3f,train acc %.3f,test acc %.3f' % (
        i+1,train_l/n,train_acc/n,test_acc))
            
            

In [57]:
from torchvision import transforms

In [58]:
batch_size=256
mnist_train=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=True,
    download=True,transform=transforms.ToTensor())
mnist_test=torchvision.datasets.FashionMNIST(
    root='F:\study\ml\DataSet\FashionMNIST',train=False,
    download=True,transform=transforms.ToTensor())

In [59]:
train_iter=torch.utils.data.DataLoader(mnist_train,batch_size,shuffle=True)
test_iter=torch.utils.data.DataLoader(mnist_test,batch_size,shuffle=False)

In [60]:
train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr)

epoch 1 ,train loss 0.001,train acc 0.868,test acc 0.807
epoch 2 ,train loss 0.001,train acc 0.871,test acc 0.858
epoch 3 ,train loss 0.001,train acc 0.876,test acc 0.839
epoch 4 ,train loss 0.001,train acc 0.879,test acc 0.861
epoch 5 ,train loss 0.001,train acc 0.881,test acc 0.870


drop out by nn

In [42]:
class FlattenLayer(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self,x):
        return x.view(x.shape[0],-1)

In [62]:
net=nn.Sequential(
    FlattenLayer(),
    nn.Linear(num_inputs,num_hiddens1),
    nn.ReLU(),
    nn.Dropout(drop_prob1),
    nn.Linear(num_hiddens1,num_hiddens2),
    nn.ReLU(),
    nn.Dropout(drop_prob2),
    nn.Linear(num_hiddens2,num_outputs)
)

In [63]:
for p in net.parameters():
    print(p)

Parameter containing:
tensor([[-0.0076,  0.0137, -0.0120,  ..., -0.0205, -0.0247, -0.0125],
        [ 0.0238, -0.0040,  0.0188,  ..., -0.0290,  0.0073,  0.0095],
        [-0.0223,  0.0037, -0.0017,  ...,  0.0039, -0.0217, -0.0202],
        ...,
        [ 0.0300, -0.0085, -0.0301,  ...,  0.0297, -0.0014,  0.0265],
        [-0.0091,  0.0345, -0.0289,  ..., -0.0290, -0.0015, -0.0138],
        [-0.0158, -0.0012, -0.0171,  ..., -0.0179, -0.0305,  0.0314]],
       requires_grad=True)
Parameter containing:
tensor([-2.5865e-02, -2.9760e-02, -1.4490e-02,  2.1228e-02,  3.0781e-02,
         1.1187e-02, -5.5964e-03, -1.1453e-02, -2.7980e-02,  3.5621e-03,
        -1.4506e-02,  1.8700e-03, -1.1632e-02, -4.0749e-03, -2.5270e-02,
        -6.8834e-04,  2.1037e-02,  2.8469e-02,  1.5538e-02, -2.4364e-02,
        -2.6169e-02,  9.1143e-03, -1.3708e-02,  3.0184e-02, -2.9191e-02,
         1.6231e-02, -3.5141e-02, -6.8763e-03,  2.8573e-02,  1.1302e-02,
         9.8088e-04,  8.3420e-03,  2.0206e-02, -3.3399e-0

In [45]:
for p in net.parameters():
    nn.init.normal_(p,mean=0,std=0.01)

In [69]:
optimizer=torch.optim.SGD(net.parameters(),lr=0.05)

In [70]:
def train_ch3_pytorch(net,train_iter,test_iter,loss,num_epochs,batch_size,params,lr,optimizer=None):
    for i in range(num_epochs):
        train_l,train_acc,test_acc,n=0,0,0,0
        for x,y in train_iter:
            y_hat=net(x)
            l=loss(y_hat,y).sum()
            
            if optimizer is None:
                if params[0].grad is not None:
                    for p in params:
                        p.grad.data.zero_()
            else:
                optimizer.zero_grad()
            
            l.backward()
            
            if optimizer is None:
                sgd(params,lr,batch_size)
            else:
                optimizer.step()
            
            train_l+=l.item()
            train_acc+=(y_hat.argmax(dim=1)==y).float().sum().item()
            n+=len(y)
        test_acc +=eval_acc(test_iter,net)
        print('epoch %d ,train loss %.3f,train acc %.3f,test acc %.3f' % (
        i+1,train_l/n,train_acc/n,test_acc))
            
            

In [72]:
train_ch3_pytorch(net,train_iter,test_iter,loss,num_epochs,batch_size,params,0.05,optimizer)

epoch 1 ,train loss 0.006,train acc 0.507,test acc 0.665
epoch 2 ,train loss 0.003,train acc 0.714,test acc 0.754
epoch 3 ,train loss 0.003,train acc 0.763,test acc 0.788
epoch 4 ,train loss 0.002,train acc 0.793,test acc 0.797
epoch 5 ,train loss 0.002,train acc 0.807,test acc 0.817
