In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

In [None]:
train_data = datasets.MNIST("../data", train=True, download=True, transform=trans)

In [None]:
test_data = datasets.MNIST('../data', train=False, download=True, transform=trans)

In [None]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1000, shuffle=False)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.max_pool2d(x, 2)
        x = F.relu(x)
        #----1----#
        x = self.conv2(x)
        x = self.conv2_drop(x)
        x = F.max_pool2d(x, 2)
        x = F.relu(x)
        #----2----#
        x = x.view(-1, 320)
        x = self.fc1(x)
        x = F.relu(x)
        #----3----#
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        #----4----#
        res = F.log_softmax(x, dim=1)
        return res

In [None]:
model = Net().to(device)

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01, betas=[0.5, 0.999])

In [None]:
def train(epoch):
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target) # The negative log likelihood loss => nll_loss()
        loss.backward()
        optimizer.step()
        if (batch_idx + 1) % int(len(train_loader.dataset) / len(data) / 10) == 0:
            print('Train Epoch: {:2d} [{:5d}/{} ({:3.0f}%)]\tLoss: {:.6f}'.format(
                epoch, 
                (batch_idx + 1) * len(data), 
                len(train_loader.dataset),
                100 * (batch_idx + 1) / len(train_loader), 
                loss.item()))

In [None]:
def test(epoch):
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()
            pred = output.max(1)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()
        
        test_loss /= len(test_loader.dataset)
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))

In [None]:
for epoch in range(1, 11):
    train(epoch)
    test(epoch)