In [1]:
# 패키지 임포트

import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn
from torchvision import datasets, transforms
from tqdm.notebook import tqdm

In [2]:
# mnist 데이터 다운로드

mnist_train = datasets.MNIST(root="./datasets", train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root="./datasets", train=False, transform=transforms.ToTensor(), download=True)
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=100, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./datasets/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 133784189.58it/s]

Extracting ./datasets/MNIST/raw/train-images-idx3-ubyte.gz to ./datasets/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./datasets/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 42178166.37it/s]


Extracting ./datasets/MNIST/raw/train-labels-idx1-ubyte.gz to ./datasets/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./datasets/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 46487137.17it/s]

Extracting ./datasets/MNIST/raw/t10k-images-idx3-ubyte.gz to ./datasets/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./datasets/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4405765.21it/s]


Extracting ./datasets/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./datasets/MNIST/raw



In [3]:
# 네트워크 정의

input_size = 784 #입력 데이터 크기
hidden_sizes = [128, 64] #perceptrons 개수를 저장해놓은 배열인듯
output_size = 10 #출력 데이터 크키

#모델 쌓기
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.LogSoftmax(dim=1))

In [4]:
#모델 형태 확인
model

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=10, bias=True)
  (5): LogSoftmax(dim=1)
)

In [5]:
# 손실함수와 최적화

criterion = nn.CrossEntropyLoss() #cross entropy loss를 손실함수로 지정
optimizer = torch.optim.SGD(model.parameters(), lr=0.09) #최적화는 SGD(Stochastic Gradient Descent)로 지정하고 learning rate는 0.09

In [6]:
import time

In [7]:
# 훈련
start = time.time()
epochs = 15
for e in range(epochs):
    running_loss = 0
    for images, labels in train_loader:
        images = images.view(images.shape[0], -1)

        optimizer.zero_grad() #(https://algopoolja.tistory.com/55)
        #gradient는 update되는 변수이므로 epoch마다 reset

        #모델 계산
        output = model(images)

        #손실 계산
        loss = criterion(output, labels)

        #역전파
        loss.backward() #가중치 조절(https://www.datamaker.io/blog/posts/32)

        #최적화
        optimizer.step()

        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(train_loader)))
end = time.time()
print('실행시간 :',end - start)

Epoch 0 - Training loss: 0.6644900024433931
Epoch 1 - Training loss: 0.2596899145593246
Epoch 2 - Training loss: 0.19038820515697202
Epoch 3 - Training loss: 0.15173312860851487
Epoch 4 - Training loss: 0.12442219671793282
Epoch 5 - Training loss: 0.10644461587071419
Epoch 6 - Training loss: 0.09165847658490141
Epoch 7 - Training loss: 0.07898401497708013
Epoch 8 - Training loss: 0.0691341407271102
Epoch 9 - Training loss: 0.06163923271931708
Epoch 10 - Training loss: 0.05526886996425067
Epoch 11 - Training loss: 0.049182409714752184
Epoch 12 - Training loss: 0.044143928861400734
Epoch 13 - Training loss: 0.03985929042877009
Epoch 14 - Training loss: 0.03509117306365321
실행시간 : 113.47332906723022


In [9]:
model2 = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.LogSoftmax(dim=1))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model2.parameters(), lr=0.09) #최적화 방법 변화
#Adam : 현재 그래디언트와 이전 그래디언트의 지수 가중 평균을 이용

In [10]:
# 훈련
start = time.time()
epochs = 15
for e in range(epochs):
    running_loss = 0
    for images, labels in train_loader:
        images = images.view(images.shape[0], -1)

        optimizer.zero_grad() #(https://algopoolja.tistory.com/55)
        #gradient는 update되는 변수이므로 epoch마다 reset

        #모델 계산
        output = model2(images)

        #손실 계산
        loss = criterion(output, labels)

        #역전파
        loss.backward() #가중치 조절(https://www.datamaker.io/blog/posts/32)

        #최적화
        optimizer.step()

        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(train_loader)))
end = time.time()
print('실행시간 :',end - start)

Epoch 0 - Training loss: 1.2039816130201022
Epoch 1 - Training loss: 0.9645242633422216
Epoch 2 - Training loss: 1.0206640410423278
Epoch 3 - Training loss: 1.1114861125747362
Epoch 4 - Training loss: 1.0820326706767083
Epoch 5 - Training loss: 1.1292081199089685
Epoch 6 - Training loss: 1.1245449287692706
Epoch 7 - Training loss: 1.1629717356959979
Epoch 8 - Training loss: 1.0710397669672966
Epoch 9 - Training loss: 1.1340247377753259
Epoch 10 - Training loss: 1.0704882286985715
Epoch 11 - Training loss: 1.2911541562279065
Epoch 12 - Training loss: 1.2891352040568989
Epoch 13 - Training loss: 1.325566752354304
Epoch 14 - Training loss: 1.352333592971166
실행시간 : 128.00720763206482


CGD가 가장 기본적이고 빠름

In [11]:
# 테스트
correct = 0
total = len(mnist_test)
with torch.no_grad():
    # Iterate through test set minibatchs
    for images, labels in tqdm(test_loader):
        # Forward pass
        #x = images.view(-1, 28*28)
        x = images.view(images.shape[0], -1)
        y = model(x)

        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions == labels).float())

print('Test accuracy: {}'.format(correct/total))


  0%|          | 0/100 [00:00<?, ?it/s]

Test accuracy: 0.9750999808311462
