In [185]:
import torch
import torch.nn as nn
import torch.optim as optim

import torchvision.datasets as dset

from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, Dataset

In [186]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [187]:
train = dset.MNIST('./MNIST_data/', download=False, train=True, transform=ToTensor())
test = dset.MNIST('./MNIST_data/', download=False, train=False, transform=ToTensor())

In [188]:
batch_size = 32

input_dim = 28
hidden_dim = 100
layer_num = 1
output_dim = 10

learning_rate = 0.0001
num_epoch = 10

In [189]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, outout_dim):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.outout_dim = outout_dim
        
        self.rnn = nn.RNN(input_size=input_dim, 
                          hidden_size=hidden_dim,
                          num_layers=num_layers,
                         batch_first=True,
                         nonlinearity='relu').to(device)
        
        self.fc = nn.Linear(in_features=hidden_dim,
                            out_features=output_dim).to(device)
        
    def forward(self, x):
        out, hn = self.rnn(x)
        out = out[:, -1, :]
        out = self.fc(out)
        
        
        return out

In [190]:
model = RNNModel(input_dim=input_dim,
                hidden_dim=hidden_dim,
                num_layers=layer_num,
                outout_dim=output_dim)

In [191]:
train_loader = DataLoader(dataset=train, batch_size=batch_size,
                         shuffle=True, drop_last=True)

In [192]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [193]:
%timeit
for epoch in range(num_epoch):
    for idx, sample in enumerate(train_loader):
        x, y = sample
        x = x.squeeze()
        x = x.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        
        prediction = model(x)
        loss = criterion(prediction, y)
        
        loss.backward()
        optimizer.step()
    
    x = test.data.to(device)
    accuracy = (torch.argmax(model(x.float()), dim=1) == test.targets.to(device)).cpu().float().mean()    
    print(f'Epoch: {epoch}, Loss: {loss}, Acc: {accuracy}')

Epoch: 0, Loss: 1.0409157276153564, Acc: 0.5449000000953674
Epoch: 1, Loss: 0.7193273305892944, Acc: 0.696399986743927
Epoch: 2, Loss: 0.7019522190093994, Acc: 0.7584999799728394
Epoch: 3, Loss: 0.5383251309394836, Acc: 0.7882000207901001
Epoch: 4, Loss: 0.9767993688583374, Acc: 0.7858999967575073
Epoch: 5, Loss: 0.27250832319259644, Acc: 0.8119999766349792
Epoch: 6, Loss: 0.08336420357227325, Acc: 0.815500020980835
Epoch: 7, Loss: 0.285937637090683, Acc: 0.8133000135421753
Epoch: 8, Loss: 0.2850656807422638, Acc: 0.8174999952316284
Epoch: 9, Loss: 0.27429574728012085, Acc: 0.7720999717712402
