<a href="https://colab.research.google.com/github/dkhanna511/Pytorch_tutorials_Revision/blob/main/RNN_GRU_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
import torch
import torchvision
import torch.nn as nn ## All neural network modules, nn.Linear, nn.Con2d, BatchNorm, Loss Functions
from torch.utils.data import DataLoader  ## Gives easier data management and creates  minii batches
import torch.nn.functional as F  ## All functions that dont have parameters
import torchvision.datasets as datasets ## Has standard datasets that can we can import in a nice way
import torchvision.transforms as transforms ## Transformations we can perform on our dataset
import torch.optim as optim ## For all optimizartion algoerithms such as SGD, Adam, etc


In [27]:
device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [28]:
## Hyperparameters  (working on MNIST Dataset only for this one)
input_size = 28
sequence_length = 28   ## We're taking one row at a time of the3 images in RNN
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2


In [29]:
## Create an RNN

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # self.rnn= nn.RNN(input_size, hidden_size, num_layers, batch_first = True) # The dataset has batches as their axis, we need to say it's True for getting it in correct order of sequence.  N x time_seq x features
        # self.rnn= nn.GRU(input_size, hidden_size, num_layers, batch_first = True) # The dataset has batches as their axis, we need to say it's True for getting it in correct order of sequence.  N x time_seq x features
        self.rnn= nn.LSTM(input_size, hidden_size, num_layers, batch_first = True) # The dataset has batches as their axis, we need to say it's True for getting it in correct order of sequence.  N x time_seq x features

        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)


    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        ## Forward propagation


        # out, _ = self.rnn(x, h0)   ## This is for GRU/RNN --> Comment it out for LSTM
        out, _ = self.rnn(x, (h0, c0)) #### This is if you want to run LSTM
        out = out.reshape(out.shape[0], -1)   ## 28 x hidden size

        out = self.fc(out)

        return out





In [30]:
### Loading data

train_dataset = datasets.MNIST(root = "/dataset", train = True, transform = transforms.ToTensor(), download = True)

test_dataset = datasets.MNIST(root = "/dataset", train = False, transform=transforms.ToTensor(), download = False)


In [31]:
### Create Dataloaders

train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)


In [32]:
### Initialize the model

model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)

In [33]:
### Loss and Optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [34]:
#### Train network
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):

        ## Get data to cusa if possible()
        data = data.to(device = device).squeeze(1)   ## MNIST has dataset format (N, 1, 28, 28) but RNN needs (N, 28, 28), so we just squeeze it to match the format
        targets = targets.to(device = device)

        ## Forward propagation

        predictions = model(data)
        loss = criterion(predictions, targets)

        ## Backward propagation
        optimizer.zero_grad()
        loss.backward()

        ## gradient descent or Adam step
        optimizer.step() ## It just makes a step to try to reach a local/global minimum

    print("Epochs : {}, Loss : {}".format(epoch, loss))


Epochs : 0, Loss : 0.059881649911403656
Epochs : 1, Loss : 0.0641922652721405


In [35]:
## Checking the accuracy


def check_accuracy(loader, model):
    if loader.dataset.train:
        print(" checking accuracy on training data")
    else:
        print("checking accuracy on testing data")


    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device = device).squeeze(1)
            y = y.to(device = device)

            scores = model(x)
            _, predictions = scores.max(1)

            num_correct +=(predictions == y).sum()
            num_samples +=predictions.size(0)

        print(" Got {}/{} with accuracy of {}".format(num_correct, num_samples, float(num_correct)/float(num_samples)*100))


    model.train()

In [36]:
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

 checking accuracy on training data
 Got 59266/60000 with accuracy of 98.77666666666667
checking accuracy on testing data
 Got 9871/10000 with accuracy of 98.71
