In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.utils as utils
import torchvision.datasets as dsets
import torchvision.transforms as transforms

In [None]:
is_cuda = torch.cuda.is_available()
device = torch.device('cuda' if is_cuda else 'cpu')

In [None]:
train_data = dsets.MNIST(root='MNIST_data/', train=True, 
                         transform=transforms.ToTensor(), download=True)
test_data = dsets.MNIST(root='MNIST_data/', train=False,
                       transform=transforms.ToTensor(), download=True)

print('number of training data:', len(train_data))
print('number of test data:', len(test_data))

In [None]:
image, label = train_data[0]
print(f'image shape:{image.shape}')
print(f'7th row of this image:{image[0][6]}')

In [None]:
print('shape of label: ', train_data[0])

In [None]:
from matplotlib import pyplot as plt
plt.imshow(image.squeeze().numpy(), cmap='gray')
plt.title('%i' % label.item())
plt.show()

In [None]:
# standardization code
standardizator = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize(mean=(0.5, 0.5, 0.5),   # 3 for RGB channels이나 실제론 gray scale
                                         std=(0.5, 0.5, 0.5))])  # 3 for RGB channels이나 실제론 gray scale

# MNIST dataset
train_data = dsets.MNIST(root='data/', train=True, transform=standardizator, download=True)
test_data  = dsets.MNIST(root='data/', train=False, transform=standardizator, download=True)

image, label = train_data[0]

In [None]:
print('Image')
print('========================================')
print('shape of this image\t:', image.shape)
print('7\'th row of this image\t:', image[0][6])

print('Label')
print('========================================')
print('shape of label: ', label.shape)
print('label: ', label.item())


In [None]:
import numpy as np 
def imshow(img):
    img = (img+1)/2    
    img = img.squeeze()
    np_img = img.numpy()
    plt.imshow(np_img, cmap='gray')
    plt.show()

# 나중에 사용할 그리드 버전의 가시화 함수
def imshow_grid(img): 
    img = utils.make_grid(img.cpu().detach())
    img = (img+1)/2
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1,2,0)))
    plt.show()
    
imshow(image)

In [None]:
mlp = nn.Sequential(
    nn.Linear(28*28, 256),
    nn.LeakyReLU(0.1),
    nn.Linear(256,10),
    nn.Softmax(dim=-1) # <- 설명의 편의를 위해 
                       # NLLLoss 대신 Softmax사용 후 
                       # loss 계산시 log를 취할 예정
).to(device)

In [None]:
print(mlp(image.to(device).view(28*28)))

In [None]:
import time

def run_epoch (model, train_data, test_data, optimizer, criterion):
    
    start_time = time.time()
    for img_i, label_i in train_data:

        img_i, label_i = img_i.to(device), label_i.to(device)

        optimizer.zero_grad()

        # Forward
        label_predicted = mlp.forward(img_i.view(-1, 28*28))
        # Loss computation
        loss  = criterion(torch.log(label_predicted), label_i.view(-1))
        # Backward
        loss.backward()
        # Optimize for img_i
        optimizer.step()
    
    total_test_loss = 0
    for img_j, label_j in test_data:

        img_j, label_j = img_j.to(device), label_j.to(device)

        with torch.autograd.no_grad():
            label_predicted = mlp.forward(img_j.view(-1, 28*28))
            total_test_loss  += criterion(torch.log(label_predicted), label_j.view(-1)).item()

    end_time = time.time()
    return total_test_loss, (end_time - start_time)


optimizer = optim.Adam(mlp.parameters(), lr=0.0001)
criterion = nn.NLLLoss()

for epoch in range(3):
    test_loss, response = run_epoch (mlp, train_data, test_data, optimizer, criterion)
    print('epoch ', epoch, ': ')
    print('\ttest_loss: ', test_loss)
    print('\tresponse(s): ', response)
