In [5]:
pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


### 1. MNIST 데이터 로드 및 분할

In [3]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# 전처리 정의
transform = transforms.ToTensor()

# MNIST 데이터셋 로드
mnist_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)

# 70% 학습, 30% 테스트로 분할
train_size = int(0.7 * len(mnist_dataset))
test_size = len(mnist_dataset) - train_size
train_dataset, test_dataset = random_split(mnist_dataset, [train_size, test_size])

# Dataloader 정의
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 확인
print(f"전체 데이터 개수: {len(mnist_dataset)}")

print(f"학습 데이터 개수 (70%): {len(train_dataset)}")
print(f"테스트 데이터 개수 (30%): {len(test_dataset)}")

train_ratio = len(train_dataset) / len(mnist_dataset)
test_ratio = len(test_dataset) / len(mnist_dataset)
print(f"학습 비율: {train_ratio:.2f}, 테스트 비율: {test_ratio:.2f}")

전체 데이터 개수: 60000
학습 데이터 개수 (70%): 42000
테스트 데이터 개수 (30%): 18000
학습 비율: 0.70, 테스트 비율: 0.30


### 2. MLP 모델 구현 (Hidden layer 2개, 각 20개 Node 적용)

In [6]:
from torchinfo import summary
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, activation_fn=nn.ReLU, use_batchnorm=False):
        super(MLP, self).__init__()
        layers = []
        layers.append(nn.Flatten())
        layers.append(nn.Linear(28*28, 20))
        if use_batchnorm:
            layers.append(nn.BatchNorm1d(20))
        layers.append(activation_fn())

        layers.append(nn.Linear(20, 20))
        if use_batchnorm:
            layers.append(nn.BatchNorm1d(20))
        layers.append(activation_fn())

        layers.append(nn.Linear(20, 10))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)


# 모델 생성
model = MLP()

# 모델 요약 출력 (입력 크기: 배치크기 1, 1채널, 28x28 이미지)
summary(model, input_size=(1, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
MLP                                      [1, 10]                   --
├─Sequential: 1-1                        [1, 10]                   --
│    └─Flatten: 2-1                      [1, 784]                  --
│    └─Linear: 2-2                       [1, 20]                   15,700
│    └─ReLU: 2-3                         [1, 20]                   --
│    └─Linear: 2-4                       [1, 20]                   420
│    └─ReLU: 2-5                         [1, 20]                   --
│    └─Linear: 2-6                       [1, 10]                   210
Total params: 16,330
Trainable params: 16,330
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.02
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.07
Estimated Total Size (MB): 0.07

### 3. 활성화 함수 테스트 비교 (ReLU vs Sigmoid)

In [8]:
import torch.optim as optim
import torch.nn.functional as F

def train(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for x, y in loader:
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def test(model, loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for x, y in loader:
            output = model(x)
            pred = output.argmax(dim=1)
            correct += pred.eq(y).sum().item()
    return correct / len(loader.dataset)

# 실험: ReLU와 Sigmoid 비교
results = {}
for activation in [nn.ReLU, nn.Sigmoid]:
    model = MLP(activation_fn=activation)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(5):
        train(model, train_loader, optimizer, criterion)
    acc = test(model, test_loader)
    results[activation.__name__] = acc

print("활성화 함수 비교 테스트 결과 :")
for k, v in results.items():
    print(f"{k} : Accuracy = {v:.4f}")


활성화 함수 비교 테스트 결과 :
ReLU : Accuracy = 0.9409
Sigmoid : Accuracy = 0.9219


### 4. Batch normalization 적용 비교

In [12]:
results_bn = {}
for use_bn in [False, True]:
    model = MLP(activation_fn=nn.ReLU, use_batchnorm=use_bn)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(5):
        train(model, train_loader, optimizer, criterion)
    acc = test(model, test_loader)
    key = "Batch Normalization 적용" if use_bn else "Batch Normalization 미적용"
    results_bn[key] = acc

print("Batch Normalization 적용 비교 테스트 결과 :")
for k, v in results_bn.items():
    print(f"{k} : Accuracy = {v:.4f}")

Batch Normalization 적용 비교 테스트 결과 :
Batch Normalization 미적용 : Accuracy = 0.9387
Batch Normalization 적용 : Accuracy = 0.9554


### 5. 가중치 초기화 방식 비교

In [11]:
def init_weights(model, method='kaiming'):
    for m in model.modules():
        if isinstance(m, nn.Linear):
            if method == 'kaiming':
                nn.init.kaiming_normal_(m.weight)
            elif method == 'xavier':
                nn.init.xavier_normal_(m.weight)
            elif method == 'normal':
                nn.init.normal_(m.weight, mean=0.0, std=0.01)

# 실험: 초기화 방법 비교
results_init = {}
for init in ['카이밍(kaiming)', '제이비어(xavier)', '정규분포(normal)']:
    model = MLP(activation_fn=nn.ReLU)
    init_weights(model, method=init)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(5):
        train(model, train_loader, optimizer, criterion)
    acc = test(model, test_loader)
    results_init[init] = acc

print("가중치 초기화 방식 비교 테스트 결과 :")
for k, v in results_init.items():
    print(f"{k} : Accuracy = {v:.4f}")


가중치 초기화 방식 비교 테스트 결과 :
카이밍(kaiming) : Accuracy = 0.9386
제이비어(xavier) : Accuracy = 0.9397
정규분포(normal) : Accuracy = 0.9392
