In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
import torch.nn.functional as F
from torch import optim

In [8]:
from torchvision import datasets
import torchvision.transforms as transforms

# converting to tensor
transform=transforms.ToTensor()

train_dataset = datasets.MNIST(root='../../data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)                                             
test_dataset = datasets.MNIST(root='../../data/',
                              train=False, 
                              transform=transforms.ToTensor())

In [10]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=64, 
                                          shuffle=False)

In [15]:
print(train_dataset.data.shape)
train_dataset.data[1]

torch.Size([60000, 28, 28])


tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,  51, 159, 253, 159,  50,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          48, 238, 252, 252, 252, 237,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   

In [80]:
#model
class DigRNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.LSTM=nn.LSTM(28, 128, 2, batch_first=True)
        self.dropout=nn.Dropout(0.2)
        self.direct=nn.Linear(128, 10)
    def forward(self, x):
        #initial layer
        hidden = torch.zeros(2, x.size(0),128).cuda() 
        input1 = torch.zeros(2, x.size(0), 128).cuda()
        output, hidden= self.LSTM(x, (hidden,input1))
        output=self.dropout(output)
        output=output[:,-1,:]
        output=self.direct(output)
        return output

In [81]:
model=DigRNN().cuda()
print(model)

DigRNN(
  (LSTM): LSTM(28, 128, num_layers=2, batch_first=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (direct): Linear(in_features=128, out_features=10, bias=True)
)


In [82]:
#loss criterion
criterion=nn.CrossEntropyLoss()
#optmizer for updating weigths
optimizer=optim.Adam(model.parameters(), lr=0.003)

In [111]:
epochs=10
for e in range(epochs):
    total_loss=0
    for images, labels in train_loader:
        images=images.view(-1, 28, 28).cuda()
        labels=labels.cuda()
        
        model.train()#to get out of evaluate
        
        optimizer.zero_grad()#to remove previous grad 
        
        output=model(images)
        loss=criterion(output, labels)
        total_loss+=loss.item()
       
        loss.backward()#back propagation
        optimizer.step()
    else:
        model.eval()
        test_correct = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images=images.view(-1,28,28).cuda()
                labels=labels.cuda()
                outputs = model(images)
                top_ps, top_dig= torch.max(outputs.data, 1)
                test_correct += (top_dig == labels).sum().item()


        train_loss = total_loss / len(train_loader.dataset)
        test_loss=0


        print("Epoch: {}/{}.. ".format(e+1, epochs),"Training Loss: {:.5f}.. ".format(train_loss),"Test Accuracy: {:.3f}".format(test_correct / len(test_loader.dataset)))
    

Epoch: 1/10..  Training Loss: 0.00017..  Test Accuracy: 0.990
Epoch: 2/10..  Training Loss: 0.00014..  Test Accuracy: 0.991
Epoch: 3/10..  Training Loss: 0.00014..  Test Accuracy: 0.990
Epoch: 4/10..  Training Loss: 0.00013..  Test Accuracy: 0.989
Epoch: 5/10..  Training Loss: 0.00020..  Test Accuracy: 0.991
Epoch: 6/10..  Training Loss: 0.00020..  Test Accuracy: 0.988
Epoch: 7/10..  Training Loss: 0.00017..  Test Accuracy: 0.989
Epoch: 8/10..  Training Loss: 0.00011..  Test Accuracy: 0.990
Epoch: 9/10..  Training Loss: 0.00014..  Test Accuracy: 0.989
Epoch: 10/10..  Training Loss: 0.00016..  Test Accuracy: 0.988
