In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets

from torch.utils.data import DataLoader

In [2]:
batch_size = 12

train_data = datasets.MNIST('./datasets', train=True, download=True, transform=transforms.ToTensor())
test_data = datasets.MNIST('./datasets', train=False, download=True, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [3]:
class MLP_h(nn.Module):
    def __init__(self, hidden_units=[512,256,128]):
        super().__init__()
        
        self.in_dim = 28 * 28    # MNIST
        self.out_dim = 10
        
        self.l_layers = nn.ModuleList()    # nn.module을 저장하는 역할을 함, Module의 존재를 PyTorch에게 알려줌
        self.l_layers.append(nn.Linear(self.in_dim, hidden_units[0]))    # 선형 변환 (입력 텐서 크기, 출력 텐서 크기)
        for i in range(len(hidden_units)-1):
            self.l_layers.append(nn.Linear(hidden_units[i], hidden_units[i+1]))
        self.l_layers.append(nn.Linear(hidden_units[-1], self.out_dim))
        
        self.relu = nn.ReLU()
        self.log_softmax = nn.LogSoftmax()
        
    def forward(self, x):
        a = x.view(-1, self.in_dim)
        for l in range(len(self.l_layers)):
            z = self.l_layers[l](a)  # l번쨰 층에 입력 a를 전달함
            if l == len(self.l_layers) - 1:
                logit = z
            else:
                a = self.relu(z)
        return logit

In [4]:
def test(hidden_units):
    print(f"{len(hidden_units)} Layers")
    print(hidden_units)
    
    model = MLP_h(hidden_units)
    criterion = nn.CrossEntropyLoss()  # 다중 클래스 분류를 위한 CrossEntropyLoss
    optimizer = optim.SGD(model.parameters(), lr=0.01)  # SGD 사용
    
    for epoch in range(10):
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data  # inputs(입력데이터), labels(정답데이터)
            
            optimizer.zero_grad()  # optimizer의 Gradient 값을 초기화함
            
            outputs = model(inputs)  # 모델에 입력값을 전달하여 예측값을 얻음
            loss = criterion(outputs, labels)  # 손실 계산 (예측값, 실제값 활용)
            loss.backward()  # Backward propagation을 통해 gradient 계산
            optimizer.step()   # 계산된 Gradient를 사용하여 모델 파라미터 업데이트
            
            running_loss += loss.item()
            if (i+1) % 2000 == 0:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
    
    print('Finished Training')
    
    n_predict = 0
    n_correct = 0
    
    for data in test_loader:
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        
        n_predict += len(predicted)
        n_correct += (labels == predicted).sum()
        
    print(f"{n_correct}/{n_predict}")
    print(f"Accuracy: {n_correct/n_predict:.3f}")

In [7]:
test([2048,1024,512,256,128])

5 Layers
[2048, 1024, 512, 256, 128]
[1,  2000] loss: 2.279
[1,  4000] loss: 0.991
[2,  2000] loss: 0.321
[2,  4000] loss: 0.232
[3,  2000] loss: 0.146
[3,  4000] loss: 0.128
[4,  2000] loss: 0.094
[4,  4000] loss: 0.084
[5,  2000] loss: 0.062
[5,  4000] loss: 0.061
[6,  2000] loss: 0.039
[6,  4000] loss: 0.044
[7,  2000] loss: 0.031
[7,  4000] loss: 0.030
[8,  2000] loss: 0.020
[8,  4000] loss: 0.024
[9,  2000] loss: 0.016
[9,  4000] loss: 0.018
[10,  2000] loss: 0.015
[10,  4000] loss: 0.014
Finished Training
9769/10000
Accuracy: 0.977
