# pytorch를 이용한 MNIST 예제입니다.

먼저 해당 라이브러리 등을 선언해줍니다.

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

그다음, 모델의 구조를 작성합니다.

In [9]:
# model architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5= nn.Linear(64, 32)
        self.fc6 = nn.Linear(32, 10)
    
    def forward(self, x):
        x = x.float()
        h1 = F.relu(self.fc1(x.view(-1, 784)))
        h2 = F.relu(self.fc2(h1))
        h3 = F.relu(self.fc3(h2))
        h4 = F.relu(self.fc4(h3))
        h5 = F.relu(self.fc5(h4))
        h6 = self.fc6(h5)
        return F.log_softmax(h6, dim = 1)

In [10]:
batch_size = 64
test_batch_size = 1000
epochs = 10
lr = 0.01
momentum = 0.5
no_cuda = True  # not use GPU
seed = 1
log_interval = 200

use_cuda = not no_cuda and torch.cuda.is_available()

torch.manual_seed(seed)

device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

In [11]:
transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307,), (0.3081,))])

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                  transform=transform),
    batch_size = batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, download=True,
                  transform=transform),
    batch_size = test_batch_size, shuffle=True, **kwargs)

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

In [18]:
def train(log_interval, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader): # data와 target을 64개씩 가져옴
        data, target = data.to(device), target.to(device)     # GPU or CPU인지 설정
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.item()))

In [23]:
def test(log_interval, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction = 'sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
            
    test_loss /= len(test_loader.dataset)
    
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
print("done")

done


In [24]:
for batch, (data, target) in enumerate(train_loader):
    print(data, target)
    break

tensor([[[[-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
          [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
          [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
          ...,
          [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
          [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
          [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242]]],


        [[[-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
          [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
          [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
          ...,
          [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
          [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
          [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242]]],


        [[[-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
          [-0.4242, -0.424

In [25]:
for epoch in range(1, 11):
    train(log_interval, model, device, train_loader, optimizer, epoch)
    test(log_interval, model, device, test_loader)


Test set: Average loss: 0.2828, Accuracy: 9156/10000 (92%)


Test set: Average loss: 0.1798, Accuracy: 9484/10000 (95%)


Test set: Average loss: 0.1324, Accuracy: 9610/10000 (96%)


Test set: Average loss: 0.1414, Accuracy: 9571/10000 (96%)


Test set: Average loss: 0.0953, Accuracy: 9722/10000 (97%)


Test set: Average loss: 0.1127, Accuracy: 9679/10000 (97%)


Test set: Average loss: 0.0882, Accuracy: 9742/10000 (97%)


Test set: Average loss: 0.0906, Accuracy: 9731/10000 (97%)


Test set: Average loss: 0.0902, Accuracy: 9748/10000 (97%)


Test set: Average loss: 0.1098, Accuracy: 9711/10000 (97%)



------------------

In [26]:
# model architecture
class NetCNN(nn.Module):
    def __init__(self):
        # 항상 torch.nn.Module을 상속받고 시작
        super(NetCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size = 5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size= 5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
    
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


다음으로 데이터를 가져와 줍니다.
가져올 데이터는 MNIST 데이터입니다.

In [8]:
# data iterator
train_iter = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                transform=transforms.Compose([transforms.ToTensor()])),
    batch_size=64, shuffle=True)


모델 학습

In [12]:
cnn = NetCNN()

In [16]:
use_cuda = torch.cuda.is_available()

In [19]:
# loss
criterion = nn.CrossEntropyLoss()
# backpropagation method
learning_rate = 1e-3
optimizer = optim.Adam(cnn.parameters(), lr=learning_rate)
# hyper-parameters
num_epochs = 2
num_batches = len(train_iter)

trn_loss_list = []
val_loss_list = []
for epoch in range(num_epochs):
    trn_loss = 0.0
    for i, data in enumerate(train_iter):
        x, label = data
        if use_cuda:
            x = x.cuda()
            label = label.cuda()
        # grad init
        optimizer.zero_grad()
        # forward propagation
        model_output = cnn(x)
        # calculate loss
        loss = criterion(model_output, label)
        # back propagation 
        loss.backward()
        # weight update
        optimizer.step()
        
        # trn_loss summary
        trn_loss += loss.item()
        # del (memory issue)
        del loss
        del model_output
        
        # 학습과정 출력
        if (i+1) % 100 == 0: # every 100 mini-batches
            with torch.no_grad(): # very very very very important!!!
                val_loss = 0.0
                for j, val in enumerate(train_iter):
                    val_x, val_label = val
                    if use_cuda:
                        val_x = val_x.cuda()
                        val_label =val_label.cuda()
                    val_output = cnn(val_x)
                    v_loss = criterion(val_output, val_label)
                    val_loss += v_loss
                       
            print("epoch: {}/{} | step: {}/{} | trn loss: {:.4f} | val loss: {:.4f}".format(
                epoch+1, num_epochs, i+1, num_batches, trn_loss / 100, val_loss / len(train_iter)
            ))            
            
            trn_loss_list.append(trn_loss/100)
            val_loss_list.append(val_loss/len(train_iter))
            trn_loss = 0.0

epoch: 1/2 | step: 100/938 | trn loss: 0.6036 | val loss: 0.5613
epoch: 1/2 | step: 200/938 | trn loss: 0.5215 | val loss: 0.4938
epoch: 1/2 | step: 300/938 | trn loss: 0.4677 | val loss: 0.4429
epoch: 1/2 | step: 400/938 | trn loss: 0.4501 | val loss: 0.4073
epoch: 1/2 | step: 500/938 | trn loss: 0.4074 | val loss: 0.3835
epoch: 1/2 | step: 600/938 | trn loss: 0.3902 | val loss: 0.3562
epoch: 1/2 | step: 700/938 | trn loss: 0.3570 | val loss: 0.3444
epoch: 1/2 | step: 800/938 | trn loss: 0.3574 | val loss: 0.3285
epoch: 1/2 | step: 900/938 | trn loss: 0.3239 | val loss: 0.3216
epoch: 2/2 | step: 100/938 | trn loss: 0.2978 | val loss: 0.3122
epoch: 2/2 | step: 200/938 | trn loss: 0.3032 | val loss: 0.2977
epoch: 2/2 | step: 300/938 | trn loss: 0.3198 | val loss: 0.2884
epoch: 2/2 | step: 400/938 | trn loss: 0.2956 | val loss: 0.2811
epoch: 2/2 | step: 500/938 | trn loss: 0.2764 | val loss: 0.2717
epoch: 2/2 | step: 600/938 | trn loss: 0.2830 | val loss: 0.2650
epoch: 2/2 | step: 700/93