# MNIST 딥러닝의 가장 기본적인 예제 데이터 셋

In [None]:
import torch
import torch.nn as nn
from torch.optim.adam import Adam
from torchvision.datasets.mnist import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data.dataloader import DataLoader

In [7]:
# 데이터 준비 (28x28 = 784 픽셀)
train_data = MNIST(root = './', train = True, download = True, transform = ToTensor())
test_data = MNIST(root = './', train = False, download = True, transform = ToTensor())

In [8]:
print(len(train_data))
print(len(test_data))

60000
10000


In [9]:
train_data[0]

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 

In [11]:
train_data[0][0].shape #[a,b,c] -> a는 차원의 계수 a차원

torch.Size([1, 28, 28])

In [12]:
# dataloader (데이터를 배치 단위로 제공)
train_loader = DataLoader(train_data, batch_size = 32, shuffle = True) # test는 True
test_loader = DataLoader(test_data, batch_size = 32, shuffle = False)

In [13]:
# 모델 정의
device = "cuda" if torch.cuda.is_available() else "cpu"

model = nn.Sequential(
    nn.Linear(784,100), # 입력값과 은닉층
    nn.ReLU(), # 활성화함수
    nn.Linear(100,50), # 위의 은닉층개수와 또다른 은닉층 개수
    nn.ReLU(), # 또다른 은닉층의 활성화함수
    nn.Linear(50,10)
)

model.to(device)

Sequential(
  (0): Linear(in_features=784, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=50, bias=True)
  (3): ReLU()
  (4): Linear(in_features=50, out_features=10, bias=True)
)

In [15]:
# 손실함수, 옵티마이저 설정
lr = 1e-3 # 지수함수 10^-3
optim = Adam(model.parameters(), lr = lr)
criterion = nn.CrossEntropyLoss() # 분류 문제이기 때문에 CrossEntropyLoss()를 사용한다

In [17]:
# 모델 학슴
for epoch in range(10):
  # 예측값
  for data, label in train_loader:
    data = torch.reshape(data, (-1, 784)).to(device)

    #forward
    preds = model(data) # 예측값을 model에 넣음
    criterion(preds, label.to(device))
    loss = criterion(preds, label.to(device))

    # backward
    optim.zero_grad()
    loss.backward()
    optim.step()
  print(f'epoch: {epoch+1} | loss:{loss.item():.3f}')


epoch: 1 | loss:0.035
epoch: 2 | loss:0.342
epoch: 3 | loss:0.041
epoch: 4 | loss:0.091
epoch: 5 | loss:0.092
epoch: 6 | loss:0.019
epoch: 7 | loss:0.062
epoch: 8 | loss:0.136
epoch: 9 | loss:0.019
epoch: 10 | loss:0.008


In [23]:
# 모델 평가
num_corr = 0

with torch.no_grad():
  for data, label in test_loader:
    data = torch.reshape(data, (-1, 784)).to(device)
    output = model(data) # (batch_size, x) -> x 는 출력값의 개수
    _, pred = output.max(dim = 1) # dim = 1 이면 가로 방향 '_' 의미는 사용하지 않겠다는 의미 - >

    corr = pred.eq(label.to(device)).sum().item() # eq는 divce에 같은 값?
    num_corr += corr

  print(f'Acc: {num_corr / len(test_data)}')

Acc: 0.976
