In [1]:
import torch
import torch.nn as nn

#popular datasets packcage,image tranformations for computer
import torchvision
#image transform을 위한 package
import torchvision.transforms as transforms

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#Hyper parameter setting

sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 2
learning_rate = 0.01


In [4]:
#Load MNIST dataset, tensor로 변환

train_dataset = torchvision.datasets.MNIST(root = '../data/', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root = '../data/', train=False, transform=transforms.ToTensor(), download=True)

#Data loader

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:01, 5382656.77it/s]                             


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw


0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 62516.92it/s]            
0it [00:00, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:01, 1216010.97it/s]                            
0it [00:00, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 18367.87it/s]            

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw
Processing...
Done!





In [16]:
#Recurrent neural network (many-to-one)
#batch_first : 우리의 input data shape가 어떻게 주어지는지를 설명해주는 옵션, 나중에 입력을 할 때 필요한 옵션

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        #Set initial hidden and cell states
        #h0 = [2,* x_size, 128]
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        out, _ = self.lstm(x,(h0, c0))
        
        out = self.fc(out[:, -1, :])
        
        return out
    
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device) 

In [17]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [19]:
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        
        #Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        #Backward and optimize
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if(i + 1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss : {:.4f}'
                  .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/2], Step [100/600], Loss : 0.6862
Epoch [1/2], Step [200/600], Loss : 0.3620
Epoch [1/2], Step [300/600], Loss : 0.3390
Epoch [1/2], Step [400/600], Loss : 0.1491
Epoch [1/2], Step [500/600], Loss : 0.1171
Epoch [1/2], Step [600/600], Loss : 0.1390
Epoch [2/2], Step [100/600], Loss : 0.1408
Epoch [2/2], Step [200/600], Loss : 0.0383
Epoch [2/2], Step [300/600], Loss : 0.2000
Epoch [2/2], Step [400/600], Loss : 0.0531
Epoch [2/2], Step [500/600], Loss : 0.0139
Epoch [2/2], Step [600/600], Loss : 0.2013


In [21]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        print('Test Accuracy : {} %'.format(100*correct/total))
        
#SAve model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

Test Accuracy : 98.0 %
Test Accuracy : 98.5 %
Test Accuracy : 97.66666666666667 %
Test Accuracy : 98.0 %
Test Accuracy : 97.8 %
Test Accuracy : 97.66666666666667 %
Test Accuracy : 97.71428571428571 %
Test Accuracy : 97.625 %
Test Accuracy : 97.66666666666667 %
Test Accuracy : 97.0 %
Test Accuracy : 97.0 %
Test Accuracy : 97.0 %
Test Accuracy : 96.6923076923077 %
Test Accuracy : 96.71428571428571 %
Test Accuracy : 96.73333333333333 %
Test Accuracy : 96.75 %
Test Accuracy : 96.76470588235294 %
Test Accuracy : 96.66666666666667 %
Test Accuracy : 96.6842105263158 %
Test Accuracy : 96.65 %
Test Accuracy : 96.66666666666667 %
Test Accuracy : 96.5 %
Test Accuracy : 96.6086956521739 %
Test Accuracy : 96.58333333333333 %
Test Accuracy : 96.44 %
Test Accuracy : 96.46153846153847 %
Test Accuracy : 96.48148148148148 %
Test Accuracy : 96.42857142857143 %
Test Accuracy : 96.41379310344827 %
Test Accuracy : 96.46666666666667 %
Test Accuracy : 96.41935483870968 %
Test Accuracy : 96.46875 %
Test Accura