# RNN Implementation using Pytorch

In [33]:
import torch 
import torch.nn as nn
import torchvision 
import torchvision.datasets as datasets 
import torchvision.transforms as transforms 
import torch.optim as optim 
import torch.nn.functional as F 
from torch.utils.data import DataLoader


# Set Device 

In [34]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# HyperParameters

In [35]:
input_size = 28
# taking 1 row at a time
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2

# Load the dataset 

In [36]:
train_dataset = datasets.MNIST(root="content/",train=True,transform = transforms.ToTensor(),download=True)
train_loader = DataLoader(dataset=train_dataset,batch_size = batch_size,shuffle=True)

test_dataset = datasets.MNIST(root="content/",train=False,transform = transforms.ToTensor(),download=True)
test_loader = DataLoader(dataset=test_dataset,batch_size = batch_size,shuffle=True)

# Define Model 

1. For RNN type of model on images we need to convert the images into time steps.

2. Initially the image size can be defined as 1x28x28 for the mnist dataset.

3. When we include the batch size also the dimension of the image will be as **(Batch_size x 1 x 28 x 28)**

4. We need to change the dimension of this in time steps we will treat each row as a single time step. Therefore one image can be described as TimeStep x InputSize (28*28).


In [65]:

# Simple traditional RNN 
class RNN_traditional(nn.Module):
  def __init__(self,input_size,hidden_size,num_layers,num_classes):
    super(RNN_traditional,self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.rnn = nn.RNN(input_size,hidden_size,num_layers,batch_first=True) #batch is first dimension
    # N x timeseq x time_feature
    self.fc = nn.Linear(hidden_size*sequence_length,num_classes)
  
  def forward(self,x):
    h0 = torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device)
    # Forward prop
    out,_ = self.rnn(x,h0)
    out = out.reshape(out.shape[0],-1)
    out = self.fc(out)
    return out
  


In [70]:

# RNN with a GRU unit 
class RNN_GRU(nn.Module):
  def __init__(self,input_size,hidden_size,num_layers,num_classes):
    super(RNN_GRU,self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.gru = nn.GRU(input_size,hidden_size,num_layers,batch_first=True) #batch is first dimension
    # N x timeseq x time_feature
    self.fc = nn.Linear(hidden_size*sequence_length,num_classes)
  
  def forward(self,x):
    h0 = torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device)
    # Forward prop
    out,_ = self.gru(x,h0)
    out = out.reshape(out.shape[0],-1)
    out = self.fc(out)
    return out

In [82]:

# RNN with a LSTM unit 
class RNN_LSTM(nn.Module):
  def __init__(self,input_size,hidden_size,num_layers,num_classes):
    super(RNN_LSTM,self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.lstm = nn.LSTM(input_size,hidden_size,num_layers,batch_first=True) #batch is first dimension
    # N x timeseq x time_feature
    self.fc = nn.Linear(hidden_size,num_classes)
  
  def forward(self,x):
    h0 = torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device)
    c0 = torch.zeros(self.num_layers, x.size(0),self.hidden_size).to(device)
    # Forward prop
    out,_ = self.lstm(x,(h0,c0))
    out = self.fc(out[:,-1,:])
    return out

In [66]:
model = RNN_traditional(input_size,hidden_size,num_layers,num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=learning_rate)

In [71]:
# With the GRU unit 
model = RNN_GRU(input_size,hidden_size,num_layers,num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=learning_rate)

In [83]:
# With the LSTM unit 
model = RNN_LSTM(input_size,hidden_size,num_layers,num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=learning_rate)

In [84]:
import time 
total_time = time.time()
for epoch in range(num_epochs):
  epoch_time = time.time()
  print(f"Epoch : {epoch}")
  for batch_idx,(data,targets) in enumerate(train_loader):
    # get data to cuda
    data = data.to(device=device).squeeze(1)
    targets = targets.to(device=device)

    #forward
    scores = model(data)
    # loss value
    loss = criterion(scores,targets)
    # backward prop
    optimizer.zero_grad()  # sets previous epoch gradient zero
    loss.backward()

    # gradient descent
    optimizer.step()
  print(f"Loss = {loss:.4f}")
  print(f"Time : {time.time()-epoch_time:.3f} sec")
  print("------------------------------------------------")

time_taken = time.time() - total_time
print(f"Overall Model Execution time : {time_taken:.3f} seconds" )
print(f"Overall Model Execution time : {time_taken/60:.3f} minutes")
print(f"Overall Model Execution time : {time_taken/3600:.3f} hours")




Epoch : 0
Loss = 0.1325
Time : 12.847 sec
------------------------------------------------
Epoch : 1
Loss = 0.0027
Time : 12.652 sec
------------------------------------------------
Overall Model Execution time : 25.501 seconds
Overall Model Execution time : 0.425 minutes
Overall Model Execution time : 0.007 hours


# Accuracy of the model

In [85]:
def  checkAccuracy(loader,model):
  if loader.dataset.train:
    print("Accuracy on Training Dataset")
  else:
    print("Accuracy on the Testing Dataset")
  
  num_correct = 0
  num_samples = 0 
  model.eval()

  with torch.no_grad():
    for x,y in loader:
      x = x.to(device=device).squeeze(1)
      y = y.to(device=device)
      scores = model(x)
      _,predictions = scores.max(1)
      num_correct+= (predictions==y).sum()
      num_samples+=predictions.size(0)
    
    print(f" Got {num_correct}/ {num_samples} with accuracy : {float(num_correct)/float(num_samples)*100:.2f}")
  model.train()

In [69]:
print("RNN traditional")
checkAccuracy(train_loader,model)
checkAccuracy(test_loader,model)    

RNN traditional
Accuracy on Training Dataset
 Got 57633/ 60000 with accuracy : 96.06
Accuracy on the Testing Dataset
 Got 9575/ 10000 with accuracy : 95.75


In [73]:
print("RNN with GRU unit ")
checkAccuracy(train_loader,model)
checkAccuracy(test_loader,model)

RNN with GRU unit 
Accuracy on Training Dataset
 Got 59231/ 60000 with accuracy : 98.72
Accuracy on the Testing Dataset
 Got 9840/ 10000 with accuracy : 98.40


In [86]:
print("RNN with LSTM unit ")
checkAccuracy(train_loader,model)
checkAccuracy(test_loader,model)

RNN with LSTM unit 
Accuracy on Training Dataset
 Got 58827/ 60000 with accuracy : 98.05
Accuracy on the Testing Dataset
 Got 9811/ 10000 with accuracy : 98.11
