In [11]:
import torch
from torch.autograd import Variable
import torchvision
import torch.nn as nn
import sys
sys.path.append("..") 
import torch.optim as optim
import d2lzh_pytorch as d2l

In [6]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(  # 1 28 28 卷积后宽高减4
                in_channels=1,
                out_channels=6,
                kernel_size=5,
            ), # 6 24 24
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2) # 6 12 12
        )
        self.conv2 = nn.Sequential( # 6 12 12
            nn.Conv2d(6, 16, 5),
            nn.Sigmoid(),
            nn.MaxPool2d(2) # 16 8 8
        )
        self.classifier = nn.Sequential(
            nn.Linear(16 * 4 * 4, 120),
            nn.Sigmoid(),
            nn.Linear(120, 84),
            nn.Sigmoid(),
            nn.Linear(84, 10)
        )
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        out = self.classifier(x.view(x.shape[0], -1))
        return out
    
net = LeNet()
net

LeNet(
  (conv1): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)

In [7]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)

In [8]:
def evaluate_accuracy(net, test_iter):
    loss = nn.CrossEntropyLoss()
    acc_sum, n = 0., 0
    for x, y in test_iter:
        out = net(Variable(x))
        acc_sum += (out.argmax(dim=1) == y).sum().item()
        n += y.shape[0]
    return acc_sum / n

In [9]:
def train(net, train_iter, test_iter, num_epochs, LR):
    loss = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr = LR)
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, sum, batch_count = 0., 0., 0, 0
        for x, y in train_iter:
            x = Variable(x)
            out = net(x)
            l = loss(out, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()

            train_l_sum += l.item()
            train_acc_sum += (out.argmax(dim=1) == y).sum().item()
            sum += y.shape[0]
            batch_count+=1
            # l是一个批量的误差，所以loss / batch_count会比较准确，为平均每个批量的误差
        test_acc = evaluate_accuracy(net, test_iter)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / sum, test_acc))


In [12]:
lr, num_epochs = 0.001, 5
train(net, train_iter, test_iter, num_epochs, lr)

epoch 1, loss 1.9204, train acc 0.295, test acc 0.578
epoch 2, loss 0.9473, train acc 0.628, test acc 0.682
epoch 3, loss 0.7532, train acc 0.720, test acc 0.731
epoch 4, loss 0.6582, train acc 0.748, test acc 0.746
epoch 5, loss 0.6070, train acc 0.764, test acc 0.764
