In [1]:
import torch
import numpy as np
import torch.functional as F
import torch.nn as nn

In [25]:
class AlexNet(nn.Module):
    
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv1 = torch.nn.Sequential(   # first layer
            torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(num_features=32),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.conv2 = torch.nn.Sequential(   #second layer
            torch.nn.Conv2d(32, 64, 3, 1, 1),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(num_features=64),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.conv3 = torch.nn.Sequential(   #third layer
            torch.nn.Conv2d(64, 128, 3, 1, 1),
            torch.nn.ReLU()
        )
        self.conv4 = torch.nn.Sequential(   #forth layer
            torch.nn.Conv2d(128, 256, 3, 1, 1),
            torch.nn.ReLU()
        )
        self.conv5 = torch.nn.Sequential(   #fifth layer
            torch.nn.Conv2d(256, 256, 3, 1, 1),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(num_features=256),
            torch.nn.MaxPool2d(2, 2)
        )
        # dense layer
        self.dense = torch.nn.Sequential(
            torch.nn.Linear(2304, 1024),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(1024, 512),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(512, 10),
            # torch.nn.Softmax(dim=10)
        )
    # 正向传播    
    def forward(self, x):
        conv1_out = self.conv1(x)
        conv2_out = self.conv2(conv1_out)
        conv3_out = self.conv3(conv2_out)
        conv4_out = self.conv4(conv3_out)
        conv5_out = self.conv5(conv4_out)
        res = conv5_out.view(-1, 2304)
        out = self.dense(res)
        return out

In [26]:
net = AlexNet()
print(net)

AlexNet(
  (conv1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (conv4): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (conv5): Sequential(
    (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affi

In [10]:
import torchvision.datasets as datasets
import torchvision.transforms as transforms
trainset = datasets.MNIST(root='./mnist', train=True, download=False, transform=transforms.ToTensor())
testset = datasets.MNIST(root='./mnist', train=False, download=False, transform=transforms.ToTensor())

In [29]:
# super params
batch_size = 100
epoch = 20
learning_rate = 0.001

In [13]:
# create data loader
train_loader = torch.utils.data.DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=testset, batch_size=batch_size, shuffle=False)

In [30]:
import torch.optim as optim

# define train function
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.5)
criterion = torch.nn.CrossEntropyLoss()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net.to(device)
print('start training')
for e in range(epoch):
    runing_loss = 0
    batch_size = batch_size
    
    for idx, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels =inputs.to(device), labels.to(device)
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print('[%d, %5d] loss:%.4f'%(e+1, (idx+1)*100, loss.item()))
print('finish training')

start training
[1, 93800] loss:0.0392
[2, 93800] loss:0.0464
[3, 93800] loss:0.0175
[4, 93800] loss:0.0024
[5, 93800] loss:0.0013
[6, 93800] loss:0.0007
[7, 93800] loss:0.0007
[8, 93800] loss:0.0102
[9, 93800] loss:0.0063
[10, 93800] loss:0.0015
[11, 93800] loss:0.0003
[12, 93800] loss:0.0082
[13, 93800] loss:0.0082
[14, 93800] loss:0.0014
[15, 93800] loss:0.0057
[16, 93800] loss:0.0023
[17, 93800] loss:0.0004
[18, 93800] loss:0.0079
[19, 93800] loss:0.0049
[20, 93800] loss:0.0013
finish training


In [33]:
# save model
torch.save(net, 'MNIST-alexNet.pkl')
net = torch.load('MNIST-alexNet.pkl')
with torch.no_grad():
    correct = 0
    total = 0
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        
        out = net(images)
        _, prediction = torch.max(out.data, 1)
        total += labels.size(0)
        correct += (prediction == labels).sum().item()
        
    print('Accuracy is {}%'.format(100 * correct / total))

Accuracy is 99.13%
