In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torchvision import datasets, transforms

# Load MNIST data
- 0부터 9까지의 $28 \times 28$ 숫자 이미지
- 60,000개의 이미지로 구성

![](https://upload.wikimedia.org/wikipedia/commons/thumb/2/27/MnistExamples.png/440px-MnistExamples.png)

In [2]:
# Load MNIST (initial download)
train_set = datasets.MNIST('../data', train=True, download=True, transform=transforms.ToTensor())
test_set = datasets.MNIST('../data', train=False, download=True, transform=transforms.ToTensor())
print('Number of training samples:', len(train_set))
print('Number of test samples:', len(test_set))

0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:06, 1588455.40it/s]                            


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 130052.12it/s]           
  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 2117716.37it/s]                           
0it [00:00, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 49453.98it/s]            


Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!
Number of training samples: 60000
Number of test samples: 10000


# PyTorch DataLoaders
![](https://miro.medium.com/max/1400/1*A-cWYNur2lqDEhUF1_gdCw.png)

In [0]:
# Define epochs & mini-batch size (train & test)
num_epochs = 5
batch_size = 256
test_batch_size=1000

In [0]:
# Define data generators
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=test_batch_size)

In [5]:
# Check data shape
x_batch, y_batch = iter(train_loader).__next__()
print('A minibatch of input images has shape:', x_batch.size())
print('A minibatch of targets:', y_batch.size())

A minibatch of input images has shape: torch.Size([256, 1, 28, 28])
A minibatch of targets: torch.Size([256])


# Model Training

## Model definition & configurations

In [0]:
class MultiLayerPerceptron(nn.Module):
    def __init__(self, in_features):
        
        super(MultiLayerPerceptron, self).__init__()
        self.linear1 = nn.Linear(in_features, 256)
        self.linear2 = nn.Linear(256, 128)
        self.linear3 = nn.Linear(128, 10)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        
        out = self.linear1(x)
        out = F.relu(out)
        
        out = self.linear2(out)
        out = F.relu(out)
        
        out = self.linear3(out)
        out = self.softmax(out)
        
        return out

In [0]:
# Instantiate model, loss criterion & optimizer
model = MultiLayerPerceptron(in_features=28 * 28)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [8]:
# Configure device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Model will run on', device)

model = model.to(device)

Model will run on cuda


## Train model 

In [10]:
for epoch in range(num_epochs):
    
    steps_per_epoch = len(train_loader.dataset) // batch_size
    
    for i, (x_batch, y_batch) in enumerate(train_loader):
        
        model.train()  # train mode
        
        # Allocate data on GPU
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        
        # Reshape (28 x 28 -> 784)
        x_batch = x_batch.view(x_batch.size(0), -1)
        
        # Predict
        y_batch_pred = model(x_batch)
        
        # Get loss
        loss = criterion(y_batch_pred, y_batch)
        
        # Update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i + 1) == steps_per_epoch:    
            print('Epoch: {:>02d} | Train loss: {:.5f}'.format(epoch, loss.item()))
        

Epoch: 00 | Train loss: 1.54298
Epoch: 01 | Train loss: 1.52725
Epoch: 02 | Train loss: 1.50819
Epoch: 03 | Train loss: 1.50538
Epoch: 04 | Train loss: 1.49832


## Test model

In [11]:
num_correct = 0
for x_batch, y_batch in test_loader:
    
    with torch.no_grad():
        
        model.eval()
    
        # Allocate data on GPU
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)

        # Reshape (28 x 28 -> 784)
        x_batch = x_batch.view(x_batch.size(0), -1)

        # Predict
        y_batch_pred = model(x_batch)
        
        # Get accuracy
        num_correct += torch.eq(y_batch, y_batch_pred.argmax(-1)).float().sum().item()
        
test_acc = num_correct / len(test_loader.dataset)
print('Test accuracy: {:.4f}'.format(test_acc))

Test accuracy: 0.9641
