In [2]:
import time
import torch
import torch.nn as nn 

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet,self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(1,6,5),
            nn.Sigmoid(),
            nn.MaxPool2d(2,2), # kernel_size ,stride = 2
            nn.Conv2d(6,16,5),
            nn.Sigmoid(),
            nn.MaxPool2d(2,2)
        )

        self.fc = nn.Sequential(
            nn.Linear(16 * 4 * 4 ,120),
            nn.Sigmoid(),
            nn.Linear(120,84),
            nn.Sigmoid(),
            nn.Linear(84,10)
        )

    def forward(self,img):
        feature = self.conv(img)

        output = self.fc(feature.view(img.shape[0],-1)) # Flatter
        return output

net = LeNet()
print(net)

LeNet(
  (conv): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)


In [3]:
import d2lzh as d2l 
import torch

batch_size = 256
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)

def train_ch5(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs):
    net = net.to(device)
    print("training on ",device)

    loss = torch.nn.CrossEntropyLoss()
    batch_count = 0
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n,start = 0.0,0.0,0,time.time()

        for x,y in train_iter:
            x = x.to(device)
            y = y.to(device)
            y_hat = net(x)
            l = loss(y_hat,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()

            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()

            n += y.shape[0]
            batch_count += 1
        test_acc = d2l.evaluate_accuracy(test_iter,net)

        print('epoch %d, loss %.4f, train acc %.3f,test acc %.3f,time %.1f' %(epoch + 1,train_l_sum / batch_count,train_acc_sum / n ,test_acc,time.time()-start))

In [4]:
lr,num_epochs = 0.001,5

optimizer = torch.optim.Adam(net.parameters(),lr=lr)

train_ch5(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs)

training on  cuda
epoch 1, loss 1.7925, train acc 0.339,test acc 0.587,time 3.8
epoch 2, loss 0.4724, train acc 0.639,test acc 0.700,time 1.9
epoch 3, loss 0.2546, train acc 0.719,test acc 0.731,time 1.9
epoch 4, loss 0.1700, train acc 0.742,test acc 0.742,time 2.0
epoch 5, loss 0.1242, train acc 0.758,test acc 0.757,time 1.9
