## **1. 필요한 모듈 선언**
---

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import random

## **2. 시드 고정**
---

In [8]:
torch.manual_seed(42)
np.random.seed(42)

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

if device == 'cuda':
    torch.cuda.manual_seed_all(777)

## **3. 데이터셋 로드 & 전처리**
---



####**(1) 데이터셋 로드**

In [10]:
# MNIST dataset
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw
Processing...


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Done!





#### **(2) 데이터로더 생성**

In [12]:
# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=100,
                                          shuffle=True,
                                          drop_last=True)

## **4. 모델 생성**
---

#### **(1) 모델 생성** 
(현재 모델 구조: 레이어 1개)

In [13]:
model = nn.Linear(784, 10, bias=True).to(device)

#### **(2) 모델의 weight값 초기화**

In [27]:
# xavier initialization
nn.init.xavier_uniform_(model.weight)

Parameter containing:
tensor([[ 0.0004,  0.0839, -0.0788,  ..., -0.0654, -0.0063, -0.0428],
        [-0.0259,  0.0172, -0.0214,  ...,  0.0521,  0.0378, -0.0008],
        [-0.0778,  0.0603, -0.0296,  ...,  0.0750,  0.0504, -0.0823],
        ...,
        [ 0.0820,  0.0499,  0.0152,  ...,  0.0536, -0.0087,  0.0197],
        [-0.0527, -0.0178, -0.0654,  ...,  0.0111, -0.0181,  0.0477],
        [ 0.0635, -0.0575, -0.0253,  ..., -0.0859,  0.0258,  0.0657]],
       requires_grad=True)

## **5. 모델 학습**
---

#### **(1) optimizer 및 cost 함수 선언**

In [28]:
# optimizer
learning_rate = 1e-4
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# cost function (loss function)
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.

#### **(2) 모델 학습**

In [26]:
# epoch: 전체 데이터를 몇 번 학습할 것인지
nb_epochs = 1000

In [29]:
# 학습 시작
print('train start !! \n')

for epoch in range(nb_epochs + 1):
    avg_cost = 0

    for batch_idx, (x, y) in enumerate(data_loader):
      x = x.view(-1, 28*28).to(device)
      y = y.to(device)

      # 모델 예측
      prediction = model(x)

      # cost (loss) 계산
      cost = criterion(prediction, y)
      avg_cost += cost

      # cost로 모델 업데이트
      optimizer.zero_grad()
      cost.backward()
      optimizer.step()

    avg_cost = avg_cost / len(data_loader)

    # 10 epoch 마다 로그 출력
    if epoch % 10 == 0:
        print('Epoch {:5d}/{} \t Cost(Loss): {:.6f}\n'.format(epoch, nb_epochs, avg_cost))

train start !! 

Epoch     0/1000 	 Cost(Loss): 2.377354

Epoch    10/1000 	 Cost(Loss): 1.826009

Epoch    20/1000 	 Cost(Loss): 1.492453

Epoch    30/1000 	 Cost(Loss): 1.274115

Epoch    40/1000 	 Cost(Loss): 1.124537

Epoch    50/1000 	 Cost(Loss): 1.017135

Epoch    60/1000 	 Cost(Loss): 0.936749

Epoch    70/1000 	 Cost(Loss): 0.874449

Epoch    80/1000 	 Cost(Loss): 0.824752

Epoch    90/1000 	 Cost(Loss): 0.784148

Epoch   100/1000 	 Cost(Loss): 0.750307

Epoch   110/1000 	 Cost(Loss): 0.721628

Epoch   120/1000 	 Cost(Loss): 0.696976

Epoch   130/1000 	 Cost(Loss): 0.675529

Epoch   140/1000 	 Cost(Loss): 0.656674

Epoch   150/1000 	 Cost(Loss): 0.639947

Epoch   160/1000 	 Cost(Loss): 0.624988

Epoch   170/1000 	 Cost(Loss): 0.611518

Epoch   180/1000 	 Cost(Loss): 0.599312

Epoch   190/1000 	 Cost(Loss): 0.588190

Epoch   200/1000 	 Cost(Loss): 0.578004

Epoch   210/1000 	 Cost(Loss): 0.568635

Epoch   220/1000 	 Cost(Loss): 0.559980

Epoch   230/1000 	 Cost(Loss): 0.551957
