In [1]:
import torch
import torch.utils.data as Data 
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.autograd import Variable

In [2]:
torch.manual_seed(1)
#Hyper Parameters
EPOCH = 2
BATCH_SIZE = 64
INPUT_SIZE = 28
TIME_STEP = 28
LR = 0.01
DOWNLOAD_MNISIT = False

In [3]:
train_data = torchvision.datasets.MNIST(
    root='./mnist',
    train=True,
    transform = transforms.ToTensor(),
    download=DOWNLOAD_MNISIT
)


In [4]:
train_loader = Data.DataLoader(dataset=train_data,batch_size=BATCH_SIZE,
                              shuffle=True)
test_data = torchvision.datasets.MNIST(
    root='./mnist',
    train=False,
    transform = transforms.ToTensor()
)
test_x = torch.unsqueeze(test_data.test_data,dim=1).type(torch.FloatTensor)[:2000]/255
test_y = test_data.test_labels[:2000]
test_x = test_x.cuda()
test_y = test_y.cuda()

In [5]:
class RNN(nn.Module):
    def __init__(self):
        super(RNN,self).__init__()
        self.rnn=nn.LSTM(
            input_size =INPUT_SIZE,
            hidden_size =64,
            num_layers =1,
            batch_first = True,
        )
        self.out = nn.Linear(64,10)
        
    def forward(self,x):
        r_out,(h_n,h_c)=self.rnn(x,None)
        out = self.out(r_out[:,-1,:]) #(batch,time step,input)
        return out

rnn = RNN()
rnn.cuda()
print(rnn)

RNN(
  (rnn): LSTM(28, 64, batch_first=True)
  (out): Linear(in_features=64, out_features=10, bias=True)
)


In [6]:
print(test_y.size(0))

2000


In [8]:
optimizer = torch.optim.Adam(rnn.parameters(),lr=LR)
loss_func = nn.CrossEntropyLoss()
for epoch in range(EPOCH):
    for step,(x,b_y) in enumerate(train_loader):
        b_x = x.view(-1,28,28).cuda()
        b_y=b_y.cuda()
        output = rnn(b_x)
        loss = loss_func(output,b_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if step%50 ==0:
            test_output = rnn(test_x.view(-1,28,28))
#             pred_y = torch.max(test_output,1)[1].data.numpy().squeeze()
            pred_y = torch.max(test_output,1)[1].cuda().data.squeeze()
#             accuracy = sum(pred_y==test_y.numpy())/test_y.size(0)
            accuracy = sum(pred_y.cpu().numpy()==test_y.cpu().numpy())/test_y.size(0)
            print('epoch: ',epoch,'| step: ',step,'| train loss:%.4f'%loss.data.cpu().numpy(),
                 '| test accuracy:%.3f'%accuracy)

epoch:  0 | step:  0 | train loss:2.2883 | test accuracy:0.102
epoch:  0 | step:  50 | train loss:1.0933 | test accuracy:0.619
epoch:  0 | step:  100 | train loss:0.9901 | test accuracy:0.766
epoch:  0 | step:  150 | train loss:0.5377 | test accuracy:0.786
epoch:  0 | step:  200 | train loss:0.2820 | test accuracy:0.854
epoch:  0 | step:  250 | train loss:0.2429 | test accuracy:0.901
epoch:  0 | step:  300 | train loss:0.4214 | test accuracy:0.905
epoch:  0 | step:  350 | train loss:0.4387 | test accuracy:0.891
epoch:  0 | step:  400 | train loss:0.1261 | test accuracy:0.931
epoch:  0 | step:  450 | train loss:0.1285 | test accuracy:0.933
epoch:  0 | step:  500 | train loss:0.0561 | test accuracy:0.937
epoch:  0 | step:  550 | train loss:0.2149 | test accuracy:0.940
epoch:  0 | step:  600 | train loss:0.1386 | test accuracy:0.944
epoch:  0 | step:  650 | train loss:0.2413 | test accuracy:0.928
epoch:  0 | step:  700 | train loss:0.1298 | test accuracy:0.944
epoch:  0 | step:  750 | tra

In [10]:
test_output = rnn(test_x[:10].view(-1, 28, 28))
# pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
pred_y = torch.max(test_output, 1)[1].cuda().data.squeeze()
print(pred_y.cpu().numpy(), 'prediction number')
print(test_y[:10].cpu().numpy(), 'real number')

[7 2 1 6 4 1 4 9 6 9] prediction number
[7 2 1 0 4 1 4 9 5 9] real number
