# MNIST
- 28 x 28 image
- 1 channel gray image
- 0 ~ 9 digits

# Reading data

In [1]:
!pip install torchvision



In [2]:
import torch
import torch.nn as nn
import torchvision.datasets as dset
import torchvision.transforms as transforms 

In [4]:
training_epochs = 15
batch_size = 100

mnist_train=dset.MNIST(root='MNIST_data/', train=True, transform=transforms.ToTensor(), download=True)
mnist_test=dset.MNIST(root='MNIST_data/', train=False, transform=transforms.ToTensor(), download=True)

data_loader=torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, drop_last=True)

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
for epoch in range(training_epochs):
    # reshape input image into [batch_size by 784]
    # label is not one-hot encoded
    X=X.view(-1, 28*28).to(device)

# Epoch / Batch size / Iteration
- epoch: 전체 데이터의 학습 횟수
- batch size: 몇 개씩 학습할 지
- iteration: batch 사용 횟수
- ex) 1000개의 train set이 있을 때, 1 epoch에 batch size=500, iteration=2

# Softmax

In [6]:
# MNIST data image of shape 28*28=784
linear=torch.nn.Linear(784, 10, bias=True).to(device) # 이미지크기, 클래스개수

# parameters
training_epochs=15
batch_size=100

# define cost/loss & optimizer
criterion=torch.nn.CrossEntropyLoss().to(device) # Softmax is internally computed.
optimizer=torch.optim.SGD(linear.parameters(), lr=0.1)

for epoch in range(training_epochs):
    avg_cost=0
    total_batch=len(data_loader)
    for X,Y in data_loader:
        # reshpae input image into [batch_size by 784]
        # label is not one-hot encoded
        X=X.view(-1, 28*28).to(device)
        optimizer.zero_grad()
        hypothesis=linear(X)
        cost=criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        avg_cost+=cost/total_batch
        
    print('Epoch: ', "%04d" % (epoch+1), 'cost=', '{:.9f}'.format(avg_cost))

Epoch:  0001 cost= 0.536887348
Epoch:  0002 cost= 0.359124303
Epoch:  0003 cost= 0.331245780
Epoch:  0004 cost= 0.316526026
Epoch:  0005 cost= 0.307073027
Epoch:  0006 cost= 0.300273120
Epoch:  0007 cost= 0.295047194
Epoch:  0008 cost= 0.290754229
Epoch:  0009 cost= 0.287258238
Epoch:  0010 cost= 0.284556389
Epoch:  0011 cost= 0.281915605
Epoch:  0012 cost= 0.279633641
Epoch:  0013 cost= 0.277664304
Epoch:  0014 cost= 0.276000142
Epoch:  0015 cost= 0.274360329


# Test

In [7]:
# Test the model using test sets
with torch.no_grad():
    X_test=mnist_test.test_data.view(-1, 28*28).float().to(device)
    Y_test=mnist_test.test_labels.to(device)
    
    prediction=linear(X_test)
    correct_prediction=torch.argmax(prediction, 1)==Y_test
    accuracy=correct_prediction.float().mean()
    print('Accuracy: ', accuracy.item())

Accuracy:  0.883400022983551




# Visualization

In [8]:
import matplotlib.pyplot as plt
import random

In [None]:
r=random.randint(0, len(mnist_test) - 1)

X_single_data=mnist_test.test_data[r:r+1].view(-1,28*28).float().to(device)
Y_single_data=mnist_test.test_labels[r:r+1].to(device)

print('Label: ', Y_single_data.item())
single_prediction=linear(X_single_data)
print('Prediction: ', torch.argmax(single_prediction, 1).item())

plt.imshow(mnist_test.test_data[r:r+1].view(28,28),cmap='Greys',interpolation='nearest')
plt.show()



Label:  4
Prediction:  4
