In [1]:
import torch
from torch import nn
from torch import optim
from torch.utils import data
from torch.autograd import Variable
from torchvision import datasets
from torchvision import transforms

import numpy as np
from tqdm import tqdm

# Data

In [2]:
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,), std=(0.5,)), # (0, 1) => (-0.5, 0.5) => (-1, 1)

])
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,), std=(0.5,)), # (0, 1) => (-0.5, 0.5) => (-1, 1)
])

train_dataset = datasets.MNIST(root='/Users/jmin/workspace/ml/datasets', train=True, transform=train_transform)
test_dataset = datasets.MNIST(root='/Users/jmin/workspace/ml/datasets', train=False, transform=test_transform)

train_dataloader = data.DataLoader(
    dataset=train_dataset, batch_size=100, shuffle=True)
test_dataloader = data.DataLoader(
    dataset=test_dataset, batch_size=100, shuffle=False)

# Model

In [3]:
class NNClassifier(nn.Module):
    def __init__(self):
        super(NNClassifier, self).__init__()
        self.layer_1 = nn.Linear(28 * 28, 200)
        self.layer_2 = nn.Linear(200, 50)
        self.layer_3 = nn.Linear(50, 10)

        self.lrelu = nn.LeakyReLU()

        self.softmax = nn.Softmax()

        self.net = nn.Sequential(
            self.layer_1,  # 784 => 200
            self.lrelu,
            self.layer_2,  # 200 => 50
            self.lrelu,
            self.layer_3,  # 50 => 10
            self.softmax,
        )

    def forward(self, x):
        # [batch_size, 784] => [batch_size, 1]
        return self.net(x)

# Build

In [1]:
classifier = NNClassifier()

loss_fn = nn.CrossEntropyLoss()
# Args:
#     Input: (batch_size, number of classes)
#     Target: (batch_size)

optimizer = optim.SGD(params=classifier.parameters(), lr=1e-3)

NameError: name 'NNClassifier' is not defined

# Train

In [8]:
print('Start training!\n')
for epoch in tqdm(range(10)):
    # epoch_loss = average of batch losses
    loss_history = []
    for images, true_labels in train_dataloader:
        # images: [batch_size, 1, 28, 28]
        # true_labels: [batch_size]

        # Tensor -> Variable
        images = Variable(images)
        true_labels = Variable(true_labels)
        
        # Resize (for loss function)
        images = images.view(-1, 28*28) # [batch_size, 1, 28, 28] => [batch_size, 28x28]
        true_labels = true_labels.view(-1) # [batch_size, 1] => [batch_size]

        # [batch_size, 28x28] => [batch_size, 10]
        predicted_labels = classifier(images)


        # Calculate loss
        average_batch_loss = loss_fn(predicted_labels, true_labels) # [1]
        loss_history.append(average_batch_loss.data[0])  # Variable -> Tensor

        # Flush out remaining gradient
        optimizer.zero_grad()

        # Backpropagation
        average_batch_loss.backward()

        # Gradient descent
        optimizer.step()

    if (epoch + 1) % 1 == 0:
        epoch_loss = np.mean(loss_history)
        log_str = 'Epoch {} | loss: {:.3f}\n'.format(epoch + 1, epoch_loss)
        print(log_str)

  0%|          | 0/10 [00:00<?, ?it/s]

Start training!



 10%|█         | 1/10 [00:07<01:05,  7.26s/it]

Epoch 1 | loss: 1.707



 20%|██        | 2/10 [00:14<00:57,  7.22s/it]

Epoch 2 | loss: 1.584



 30%|███       | 3/10 [00:21<00:50,  7.26s/it]

Epoch 3 | loss: 1.526



 40%|████      | 4/10 [00:29<00:43,  7.28s/it]

Epoch 4 | loss: 1.513



 50%|█████     | 5/10 [00:36<00:36,  7.25s/it]

Epoch 5 | loss: 1.507



 60%|██████    | 6/10 [00:43<00:28,  7.16s/it]

Epoch 6 | loss: 1.503



 70%|███████   | 7/10 [00:50<00:21,  7.17s/it]

Epoch 7 | loss: 1.498



 80%|████████  | 8/10 [00:57<00:14,  7.12s/it]

Epoch 8 | loss: 1.495



 90%|█████████ | 9/10 [01:04<00:07,  7.14s/it]

Epoch 9 | loss: 1.493



100%|██████████| 10/10 [01:11<00:00,  7.09s/it]

Epoch 10 | loss: 1.491






# Evaluation

In [6]:
print('Start Evaluation!\n')
test_loss_history = []
for images, true_labels in tqdm(test_dataloader):
    # images: [batch_size, 1, 28, 28]
    # true_labels: [batch_size]

    # Tensor -> Variable
    images = Variable(images)
    true_labels = Variable(true_labels)

    # Resize (for loss function)
    images = images.view(-1, 28*28) # [batch_size, 1, 28, 28] => [batch_size, 28x28]
    true_labels = true_labels.view(-1) # [batch_size, 1] => [batch_size]

    # [batch_size, 28x28] => [batch_size, 10]
    predicted_labels = classifier(images)


    # Calculate loss
    average_batch_loss = loss_fn(predicted_labels, true_labels) # [1]
    test_loss_history.append(average_batch_loss.data[0])  # Variable -> Tensor

test_loss = np.mean(test_loss_history)
log_str = 'Test loss: {:.3f}\n'.format(test_loss)
print(log_str)

  0%|          | 0/100 [00:00<?, ?it/s]

Start Evaluation!



100%|██████████| 100/100 [00:00<00:00, 108.28it/s]

Test loss: 2.292




