In [1]:
import torch
from torch import nn
from torch import optim
from torch.utils import data
from torch.autograd import Variable
from torchvision import datasets
from torchvision import transforms

import numpy as np
from tqdm import tqdm

# Data

In [3]:
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,), std=(0.5,)), # (0, 1) => (-0.5, 0.5) => (-1, 1)

])
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,), std=(0.5,)), # (0, 1) => (-0.5, 0.5) => (-1, 1)
])

train_dataset = datasets.MNIST(root='./datasets', train=True, transform=train_transform, download=True)
test_dataset = datasets.MNIST(root='./datasets', train=False, transform=test_transform)

train_dataloader = data.DataLoader(
    dataset=train_dataset, batch_size=100, shuffle=True)
test_dataloader = data.DataLoader(
    dataset=test_dataset, batch_size=100, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


# Model

In [4]:
class NNClassifier(nn.Module):
    def __init__(self):
        super(NNClassifier, self).__init__()
        self.layer_1 = nn.Linear(28 * 28, 200)
        self.layer_2 = nn.Linear(200, 50)
        self.layer_3 = nn.Linear(50, 10)

        self.lrelu = nn.LeakyReLU()

        self.softmax = nn.Softmax()

        self.net = nn.Sequential(
            self.layer_1,  # 784 => 200
            self.lrelu,
            self.layer_2,  # 200 => 50
            self.lrelu,
            self.layer_3,  # 50 => 10
            self.softmax,
        )

    def forward(self, x):
        # [batch_size, 784] => [batch_size, 1]
        return self.net(x)

# Build

In [5]:
classifier = NNClassifier()

loss_fn = nn.CrossEntropyLoss()
# Args:
#     Input: (batch_size, number of classes)
#     Target: (batch_size)

optimizer = optim.SGD(params=classifier.parameters(), lr=1e-3)

# Train

In [None]:
print('Start training!\n')
for epoch in tqdm(range(10)):
    # epoch_loss = average of batch losses
    loss_history = []
    for images, true_labels in train_dataloader:
        # images: [batch_size, 1, 28, 28]
        # true_labels: [batch_size]

        # Tensor -> Variable
        images = Variable(images)
        true_labels = Variable(true_labels)
        
        # Resize (for loss function)
        images = images.view(-1, 28*28) # [batch_size, 1, 28, 28] => [batch_size, 28x28]
        true_labels = true_labels.view(-1) # [batch_size, 1] => [batch_size]

        # [batch_size, 28x28] => [batch_size, 10]
        predicted_labels = classifier(images)


        # Calculate loss
        average_batch_loss = loss_fn(predicted_labels, true_labels) # [1]
        loss_history.append(average_batch_loss.data[0])  # Variable -> Tensor

        # Flush out remaining gradient
        optimizer.zero_grad()

        # Backpropagation
        average_batch_loss.backward()

        # Gradient descent
        optimizer.step()

    if (epoch + 1) % 1 == 0:
        epoch_loss = np.mean(loss_history)
        log_str = 'Epoch {} | loss: {:.3f}\n'.format(epoch + 1, epoch_loss)
        print(log_str)

  0%|          | 0/10 [00:00<?, ?it/s]

Start training!



 10%|█         | 1/10 [00:08<01:16,  8.45s/it]

Epoch 1 | loss: 2.302



 20%|██        | 2/10 [00:17<01:09,  8.67s/it]

Epoch 2 | loss: 2.301



 30%|███       | 3/10 [00:26<01:00,  8.58s/it]

Epoch 3 | loss: 2.300



 40%|████      | 4/10 [00:33<00:49,  8.32s/it]

Epoch 4 | loss: 2.298



 50%|█████     | 5/10 [00:41<00:40,  8.19s/it]

Epoch 5 | loss: 2.297



 60%|██████    | 6/10 [00:49<00:32,  8.05s/it]

Epoch 6 | loss: 2.296



# Evaluation

In [None]:
print('Start Evaluation!\n')
test_loss_history = []
for images, true_labels in tqdm(test_dataloader):
    # images: [batch_size, 1, 28, 28]
    # true_labels: [batch_size]

    # Tensor -> Variable
    images = Variable(images)
    true_labels = Variable(true_labels)

    # Resize (for loss function)
    images = images.view(-1, 28*28) # [batch_size, 1, 28, 28] => [batch_size, 28x28]
    true_labels = true_labels.view(-1) # [batch_size, 1] => [batch_size]

    # [batch_size, 28x28] => [batch_size, 10]
    predicted_labels = classifier(images)


    # Calculate loss
    average_batch_loss = loss_fn(predicted_labels, true_labels) # [1]
    test_loss_history.append(average_batch_loss.data[0])  # Variable -> Tensor

test_loss = np.mean(test_loss_history)
log_str = 'Test loss: {:.3f}\n'.format(test_loss)
print(log_str)