In [32]:
# CNN implementation Overview:
# Input - LAYER1(Conv2d-ReLU-MaxPool) - LAYER2(Conv2d-ReLU-MaxPool) - view() - FullyConnected
# CrossEntropyLoss(Softmax, NLLloss)

# 입력 = 1 * 28 * 28 (c, h, w)
# (Layer 1) Convolution layer = (in_c=1, out_c=32, kernel_size=3, stride=1, padding=1)
# (Layer 1) MaxPool layer = (kernel_size=2, stride=2)
# (Layer 2) Convolution layer = (in_c=32, out_c=64, kernel_size=3, stride=1, padding=1) 
# (Layer 2) MaxPool layer = (kernel_size=2, stride=2)
# view => (batch_size x [7,7,64] => batch_size x [3136])
# Fully_Connect layer => (input=3136, output=10)


In [33]:
# 1. 라이브러리 import

import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.init

In [34]:
# 2. GPU 사용 설정 + 재현성을 위한 seeding

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(803)
if device == 'cuda':
    torch.cuda.manual_seed_all(803)
    
torch.cuda.is_available()

True

In [35]:
# 3. parameter 설정 : i.e lr, epochs, batch_size, etc.

lr = 0.001
epochs = 15
batch_size = 100

In [36]:
# 4. DataLoader 준비

mnist_train = dsets.MNIST(root='MNIST_data/',
                         train = True,
                         transform = transforms.ToTensor(),
                         download = True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train = False,
                         transform = transforms.ToTensor(),
                         download = True)

data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         drop_last=True)

In [37]:
# 5. 모델 만들기

class CNN(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc1 = nn.Linear(3*3*128, 625)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(625, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        return out
    
    
model = CNN().to(device)
model

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=1152, out_features=625, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=625, out_features=10, bias=True)
)

In [38]:
# model 테스트

model = CNN().to(device)
value = (torch.Tensor(1,1,28,28)).to(device)
model(value)

tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
       grad_fn=<AddmmBackward>)

In [39]:
# 6. Loss function, optimizer 설정

loss_func = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [40]:
# 7. 모델 학습 및 loss 확인

total_batch = len(data_loader)
print('Learning started.')

for epoch in range(epochs):
    avg_cost = 0
    
    for img,lbl in data_loader:
        img = img.to(device)
        lbl = lbl.to(device)

        optimizer.zero_grad()
        
        prediction = model(img)
        cost = loss_func(prediction, lbl)
        
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('[EPOCH:{}] COST = {}'.format(epoch+1, avg_cost))

print('Done.')

Learning started.
[EPOCH:1] COST = 0.16250336170196533
[EPOCH:2] COST = 0.04107049107551575
[EPOCH:3] COST = 0.029627325013279915
[EPOCH:4] COST = 0.022371141240000725
[EPOCH:5] COST = 0.016381287947297096
[EPOCH:6] COST = 0.015480434522032738
[EPOCH:7] COST = 0.012301549315452576
[EPOCH:8] COST = 0.009106561541557312
[EPOCH:9] COST = 0.010176115669310093
[EPOCH:10] COST = 0.00798829086124897
[EPOCH:11] COST = 0.007309428881853819
[EPOCH:12] COST = 0.007960046641528606
[EPOCH:13] COST = 0.006176525726914406
[EPOCH:14] COST = 0.0043799239210784435
[EPOCH:15] COST = 0.005847914610058069
Done.


In [31]:
# 8. 학습된 모델의 성능 평가

with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_pred = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_pred.float().mean()
    
    print('ACCURACY:', accuracy.item())


ACCURACY: 0.9907999634742737
