# 5. 케라스와 텐서플로우: PyTorch 버전

## 1. 필수 라이브러리 임포트 (PyTorch)

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import math, random
import numpy as np

# 재현성 설정
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

## 2. GPU 사용 가능 여부 확인

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('사용 디바이스:', device)
print('CUDA 가능 여부:', torch.cuda.is_available())

사용 디바이스: cpu
CUDA 가능 여부: False


## 3. 기본 선형 층 소개

원래 Keras 예제를 따라 `SimpleDense` 커스텀 클래스를 구현했지만, 입문 단계에서는 PyTorch가 제공하는 `nn.Linear`를 바로 사용하는 편이 훨씬 단순합니다.

In [3]:
# PyTorch의 nn.Linear 직접 구현

class SimpleDense(nn.Module):
    def __init__(self, units, activation=None):
        super().__init__()
        self.units = units
        self.activation = activation
        self.W = None
        self.b = None
    def build(self, in_features):
        self.W = nn.Parameter(torch.randn(in_features, self.units) * 0.01)
        self.b = nn.Parameter(torch.zeros(self.units))
    def forward(self, x):
        if self.W is None:
            self.build(x.shape[-1])
        y = x @ self.W + self.b
        if self.activation is not None:
            y = self.activation(y)
        return y

## 4. Sequential 모델 사용

직접 MySequential을 구현하기보다 `nn.Sequential`을 바로 사용하는 것이 초보자에게 더 명확합니다.

In [4]:
# nn.Sequential 직접 구현
class MySequential(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = nn.ModuleList(layers)
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

## 5. 모델 구성 (가장 단순한 형태)

입력 784 → 은닉 512(ReLU) → 출력 10 (로짓). Softmax는 `CrossEntropyLoss` 내부에서 처리되므로 마지막 층은 활성함수가 없습니다.

In [5]:
model = MySequential([
    SimpleDense(512, activation=nn.ReLU()),  # 입력 크기(784)는 첫 forward 때 자동 감지
    SimpleDense(10)                        # 최종 로짓 (Softmax 미적용)
]).to(device)

print(model)

# (선택) 더 단순한 대안 (권장):
# model = nn.Sequential(
#     nn.Linear(784, 512),
#     nn.ReLU(),
#     nn.Linear(512, 10)
# ).to(device)

MySequential(
  (layers): ModuleList(
    (0): SimpleDense(
      (activation): ReLU()
    )
    (1): SimpleDense()
  )
)


## 6. 손실함수 / 옵티마이저 / 정확도 함수

`CrossEntropyLoss`는 `LogSoftmax + NLLLoss`를 합친 형태입니다. 라벨은 정수 인덱스(0~9) 형태면 됩니다.

In [6]:
# SimpleDense 는 첫 forward 때 가중치를 build(lazy) 하므로 optimizer 생성 전에 한 번 더미 입력을 통과시켜 파라미터를 초기화합니다.
with torch.no_grad():
    _ = model(torch.zeros(1, 784, device=device))  # 파라미터 생성 트리거

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

def accuracy_fn(logits, targets):
    return (logits.argmax(dim=1) == targets).float().mean().item()

## 7. MNIST 데이터 로드

`transforms.ToTensor()` 후 평탄화만 적용. 추가 전처리 불필요.

In [7]:
# transform 단계에서 이미 각 샘플을 (784,) 로 평탄화
transform = transforms.Compose([
    transforms.ToTensor(),                # (1,28,28)
    transforms.Lambda(lambda x: x.view(-1))  # (784,)
])
train_dataset_full = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
TEST_SPLIT = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
print('Train full size:', len(train_dataset_full))
print('Test size:', len(TEST_SPLIT))

Train full size: 60000
Test size: 10000


In [8]:
type(train_dataset_full)

torchvision.datasets.mnist.MNIST

## 8. 데이터 전처리 간소화

추가 조작 없이 바로 사용. 샘플 텐서 구조만 확인.

In [9]:
x0, y0 = train_dataset_full[0]
print(x0.shape)

torch.Size([784])


In [10]:
x0.max(), x0.min()

(tensor(1.), tensor(0.))

## 9. 훈련/검증 분리 (random_split)

전체의 30%를 검증셋으로 활용.

In [11]:
from torch.utils.data import random_split
val_count = int(0.3 * len(train_dataset_full))
train_count = len(train_dataset_full) - val_count
train_subset, val_subset = random_split(train_dataset_full, [train_count, val_count])
print(len(train_subset), len(val_subset))

42000 18000


## 10. DataLoader 구성

셔플은 훈련셋에서만. 배치 크기는 128.

In [12]:
BATCH_SIZE = 128
train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE)
test_loader = DataLoader(TEST_SPLIT, batch_size=BATCH_SIZE)
xb, yb = next(iter(train_loader))
print('배치 입력 모양:', xb.shape, '배치 라벨 모양:', yb.shape)  # (B,784)

배치 입력 모양: torch.Size([128, 784]) 배치 라벨 모양: torch.Size([128])


## 11. 기록용 history 딕셔너리

In [13]:
history = {'loss': [], 'accuracy': [], 'val_loss': [], 'val_accuracy': []}

## 12. 학습 함수 (1 에포크)

In [14]:
def train_one_epoch(loader):
    model.train()
    total_loss, total_correct, total_samples = 0.0, 0, 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        preds = logits.argmax(dim=1)
        total_correct += (preds == yb).sum().item()
        total_loss += loss.item() * xb.size(0)
        total_samples += xb.size(0)
    avg_loss = total_loss / total_samples
    avg_acc = total_correct / total_samples
    return avg_loss, avg_acc

## 13. 검증 함수

In [15]:
def evaluate(loader):
    model.eval()
    total_loss, total_correct, total_samples = 0.0, 0, 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            loss = criterion(logits, yb)
            preds = logits.argmax(dim=1)
            total_correct += (preds == yb).sum().item()
            total_loss += loss.item() * xb.size(0)
            total_samples += xb.size(0)
    avg_loss = total_loss / total_samples
    avg_acc = total_correct / total_samples
    return avg_loss, avg_acc

## 14. 훈련 실행

In [16]:
EPOCHS = 5
for epoch in range(1, EPOCHS+1):
    train_loss, train_acc = train_one_epoch(train_loader)
    val_loss, val_acc = evaluate(val_loader)
    history['loss'].append(train_loss)
    history['accuracy'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_accuracy'].append(val_acc)
    print(f'Epoch {epoch}: train_loss={train_loss:.4f} train_acc={train_acc:.4f} val_loss={val_loss:.4f} val_acc={val_acc:.4f}')

Epoch 1: train_loss=0.4093 train_acc=0.8915 val_loss=0.2272 val_acc=0.9359
Epoch 2: train_loss=0.1752 train_acc=0.9497 val_loss=0.1482 val_acc=0.9569
Epoch 2: train_loss=0.1752 train_acc=0.9497 val_loss=0.1482 val_acc=0.9569
Epoch 3: train_loss=0.1171 train_acc=0.9657 val_loss=0.1180 val_acc=0.9649
Epoch 3: train_loss=0.1171 train_acc=0.9657 val_loss=0.1180 val_acc=0.9649
Epoch 4: train_loss=0.0856 train_acc=0.9752 val_loss=0.1008 val_acc=0.9703
Epoch 4: train_loss=0.0856 train_acc=0.9752 val_loss=0.1008 val_acc=0.9703
Epoch 5: train_loss=0.0651 train_acc=0.9810 val_loss=0.0986 val_acc=0.9690
Epoch 5: train_loss=0.0651 train_acc=0.9810 val_loss=0.0986 val_acc=0.9690


## 15. History 출력

In [17]:
print(history)

{'loss': [0.4093397729510353, 0.17519165026573907, 0.11710371780111677, 0.0855711860933474, 0.06508333561587192], 'accuracy': [0.891452380952381, 0.9497380952380953, 0.9657142857142857, 0.9752380952380952, 0.9810476190476191], 'val_loss': [0.2272435281806522, 0.1482427603205045, 0.1180462476015091, 0.10083010280132294, 0.09863487127754424], 'val_accuracy': [0.9358888888888889, 0.9568888888888889, 0.9648888888888889, 0.9702777777777778, 0.969]}


## 16. 테스트셋 평가

In [18]:
test_loss, test_acc = evaluate(test_loader)
print('Test loss:', test_loss, 'Test acc:', test_acc)

Test loss: 0.08912140651643276 Test acc: 0.971


## 17. 예측 (샘플 배치)

In [19]:
model.eval()
with torch.no_grad():
    xb, yb = next(iter(test_loader))
    xb = xb.to(device)
    logits = model(xb)
    probs = torch.softmax(logits, dim=1)
print(probs[:2])

tensor([[1.6030e-05, 1.1574e-08, 2.6709e-04, 7.0027e-04, 9.7190e-09, 1.0183e-06,
         1.0000e-11, 9.9889e-01, 2.9591e-05, 1.0066e-04],
        [7.3675e-07, 1.5518e-05, 9.9996e-01, 1.4584e-05, 2.2857e-13, 5.4621e-06,
         2.0750e-07, 1.5682e-12, 1.3118e-06, 1.3611e-13]])


## 18. 예측 vs 실제 라벨 비교

In [20]:
pred_labels = probs.argmax(dim=1)
print('예측:', pred_labels[:10].tolist())
print('실제:', yb[:10].tolist())

예측: [7, 2, 1, 0, 4, 1, 4, 9, 5, 9]
실제: [7, 2, 1, 0, 4, 1, 4, 9, 5, 9]


## 19. (선택) 손실식 참고 (교차 엔트로피 수식)

다중 클래스 교차 엔트로피: $L = -\sum_{i=1}^{C} y_i \log(\hat{y}_i)$ (여기서 $y_i$는 원핫, PyTorch에서는 타겟 인덱스를 이용해 내부적으로 동일 계산 수행).