In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [19]:
# 1. 데이터를 불러오고 전처리합니다.
transform = transforms.Compose([
    transforms.ToTensor(),                # [0,255] -> [0.0,1.0]
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 평균/표준편차
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True,  num_workers=2)
test_loader  = DataLoader(test_dataset,  batch_size=1000, shuffle=False, num_workers=2)


100%|██████████| 9.91M/9.91M [00:02<00:00, 4.52MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 141kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.40MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 3.81MB/s]


In [21]:
# 2. CNN 모델 정의
class CNNNet(nn.Module):
    def __init__(self):
        super().__init__()
        # conv block 1: 1×28×28 → 32×26×26 → 32×13×13
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=0)
        self.pool1 = nn.MaxPool2d(2, 2)
        # conv block 2: 32×13×13 → 64×11×11 → 64×5×5
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=0)
        self.pool2 = nn.MaxPool2d(2, 2)
        # conv block 3: 64×5×5 → 64×3×3
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, padding=0)
        # 완전연결
        self.fc1 = nn.Linear(64 * 3 * 3, 64)
        self.fc2 = nn.Linear(64, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool1(x)
        x = self.relu(self.conv2(x))
        x = self.pool2(x)
        x = self.relu(self.conv3(x))
        x = x.view(x.size(0), -1)   # flatten
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [23]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNNNet().to(device)

In [25]:
# 3. 손실함수, 옵티마이저 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [27]:
# 4. 학습 함수
def train(epoch):
    model.train()
    total_loss = 0
    for batch_idx, (imgs, labels) in enumerate(train_loader, 1):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(train_loader)
    print(f'[{epoch}] Train Loss: {avg_loss:.4f}')

In [29]:
# 5. 평가 함수
def test():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    acc = correct / total
    print(f'  → Test Accuracy: {acc:.4f}')

In [31]:
# 6. 실제 학습 및 평가
num_epochs = 5
for epoch in range(1, num_epochs + 1):
    train(epoch)
    test()

[1] Train Loss: 0.1692
  → Test Accuracy: 0.9827
[2] Train Loss: 0.0475
  → Test Accuracy: 0.9882
[3] Train Loss: 0.0343
  → Test Accuracy: 0.9911
[4] Train Loss: 0.0264
  → Test Accuracy: 0.9903
[5] Train Loss: 0.0220
  → Test Accuracy: 0.9900
