In [31]:
import torch
from torch import nn

In [32]:
#定义模型
"""
model = nn.Sequential(
    nn.Linear(28 * 28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Dropout(0, 1),
    nn.Linear(64, 10),
)"""

class ResNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(28 * 28, 64)
        self.l2 = nn.Linear(64, 64)
        self.l3 = nn.Linear(64, 10)
        self.do = nn.Dropout(0.1)

    def forward(self,x):
        h1 = nn.functional.relu(self.l1(x))
        h2 = nn.functional.relu(self.l2(h1))
        do = self.do(h2 + h1)  #dropout
        logits = self.l3(do)
        return logits

model = ResNet().cuda()

In [33]:
#优化器定义
from torch import optim
optimiser = optim.SGD(model.parameters(), lr = 1e-2)

In [34]:
#损失定义
loss = nn.CrossEntropyLoss()

In [35]:
#load data
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader
train_data = datasets.MNIST('data', train = True, transform = transforms.ToTensor() )
train, val = random_split(dataset=train_data, lengths=[55000, 5000])#55000个训练集和5000个验证集（随机划分）
train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size=32)


In [36]:
#training loop
nb_epochs = 5
for epoch in range(nb_epochs):
    losses = list()
    accuracies = list()
    model.train()#dropout
    for batch in train_loader:
        x,y = batch  
        
        #x为图像 batch*1*28*28,y为label.
        #将x拉直，batch*(28*28)  
        x = x.view(x.size(0),-1).cuda()

        #1 forward
        logit = model(x)
        # import pdb; pdb.set_trace()    调试方法，添加断点

        #2 compute the objective function
        J = loss(logit,y.cuda())

        #3 cleaning the gradients
        model.zero_grad()
        #optimser.zero_grad()也可以

        #4 accumulate the partial derivation of j
        J.backward()


        # 5 step in the oppposite direction of the gradient
        optimiser.step()
        # with torch.no_grad(): params = params - 学习率*梯度


        losses.append(J.item())
        accuracies.append(y.cuda().eq(logit.detach().argmax(1)) .float().mean())
    print(f'Epoch {epoch + 1}, train loss: {torch. tensor(losses).mean():.2f}')
    print(f'train accuracy:{torch.tensor(accuracies).mean():.2f}')
    
    #valitation loop
    model.eval()#dropout
    losses = list()
    accuracies = list()
    for batch in val_loader:
        x,y = batch  
        
        #x为图像 batch*1*28*28,y为label.
        #将x拉直，batch*(28*28)  
        x = x.view(x.size(0),-1).cuda()

        #1 forward
        with torch.no_grad():
            logit = model(x)  

        #2 compute the objective function
        J = loss(logit,y.cuda())

        losses.append(J.item())
    accuracies.append(y.cuda().eq(logit.detach().argmax(1)).cuda().float().mean())
    print(f'Epoch {epoch + 1}, val loss: {torch. tensor(losses).mean():.2f}')
    print(f'val accuracy:{torch.tensor(accuracies).mean():.2f}')

Epoch 1, train loss: 0.85
train accuracy:0.78
Epoch 1, val loss: 0.38
val accuracy:0.75
Epoch 2, train loss: 0.38
train accuracy:0.89
Epoch 2, val loss: 0.29
val accuracy:0.75
Epoch 3, train loss: 0.31
train accuracy:0.91
Epoch 3, val loss: 0.25
val accuracy:0.75
Epoch 4, train loss: 0.27
train accuracy:0.92
Epoch 4, val loss: 0.22
val accuracy:0.75
Epoch 5, train loss: 0.24
train accuracy:0.93
Epoch 5, val loss: 0.20
val accuracy:0.75
