<a href="https://colab.research.google.com/github/dkhanna511/Pytorch_tutorials_Revision/blob/main/Bi_directional_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import torch
import torchvision
import torch.nn as nn ## All neural network modules, nn.Linear, nn.Con2d, BatchNorm, Loss Functions
from torch.utils.data import DataLoader  ## Gives easier data management and creates  minii batches
import torch.nn.functional as F  ## All functions that dont have parameters
import torchvision.datasets as datasets ## Has standard datasets that can we can import in a nice way
import torchvision.transforms as transforms ## Transformations we can perform on our dataset
import torch.optim as optim ## For all optimizartion algoerithms such as SGD, Adam, etc

In [21]:
device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [22]:
## Hyperparameters  (working on MNIST Dataset only for this one)
input_size = 28
sequence_length = 28   ## We're taking one row at a time of the3 images in RNN
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2


In [23]:
## Create an Bi-directional LSTM

class Bidrectional_LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm= nn.LSTM(input_size, hidden_size, num_layers, batch_first = True, bidirectional= True) # The dataset has batches as their axis, we need to say it's True for getting it in correct order of sequence.  N x time_seq x features

        self.fc = nn.Linear(hidden_size * 2, num_classes)


    def forward(self, x):
        h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device)  ## num_layers*2 --> one going forward, one going backward
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        ## Forward propagation

        out, _ = self.lstm(x, (h0, c0)) #### This is if you want to run LSTM

        out = self.fc(out[:, -1, :])

        return out




In [24]:
### Loading data

train_dataset = datasets.MNIST(root = "/dataset", train = True, transform = transforms.ToTensor(), download = True)

test_dataset = datasets.MNIST(root = "/dataset", train = False, transform=transforms.ToTensor(), download = False)

In [25]:
### Create Dataloaders

train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)


In [27]:
### Initialize the model

model =Bidrectional_LSTM(input_size, hidden_size, num_layers, num_classes).to(device)

In [28]:
### Loss and Optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [29]:
#### Train network
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):

        ## Get data to cusa if possible()
        data = data.to(device = device).squeeze(1)   ## MNIST has dataset format (N, 1, 28, 28) but RNN needs (N, 28, 28), so we just squeeze it to match the format
        targets = targets.to(device = device)

        ## Forward propagation

        predictions = model(data)
        loss = criterion(predictions, targets)

        ## Backward propagation
        optimizer.zero_grad()
        loss.backward()

        ## gradient descent or Adam step
        optimizer.step() ## It just makes a step to try to reach a local/global minimum

        print("Epochs : {},  Batch : {}, Loss : {}".format(epoch, batch_idx, loss))


Epochs : 0,  Batch : 0, Loss : 2.3020262718200684
Epochs : 0,  Batch : 1, Loss : 2.2918701171875
Epochs : 0,  Batch : 2, Loss : 2.288234233856201
Epochs : 0,  Batch : 3, Loss : 2.3253841400146484
Epochs : 0,  Batch : 4, Loss : 2.288038492202759
Epochs : 0,  Batch : 5, Loss : 2.306373357772827
Epochs : 0,  Batch : 6, Loss : 2.303168535232544
Epochs : 0,  Batch : 7, Loss : 2.299791097640991
Epochs : 0,  Batch : 8, Loss : 2.2637624740600586
Epochs : 0,  Batch : 9, Loss : 2.2621474266052246
Epochs : 0,  Batch : 10, Loss : 2.295384645462036
Epochs : 0,  Batch : 11, Loss : 2.2773056030273438
Epochs : 0,  Batch : 12, Loss : 2.2246062755584717
Epochs : 0,  Batch : 13, Loss : 2.212144374847412
Epochs : 0,  Batch : 14, Loss : 2.126290798187256
Epochs : 0,  Batch : 15, Loss : 2.119276523590088
Epochs : 0,  Batch : 16, Loss : 2.276202440261841
Epochs : 0,  Batch : 17, Loss : 2.1511006355285645
Epochs : 0,  Batch : 18, Loss : 2.131728410720825
Epochs : 0,  Batch : 19, Loss : 1.9986038208007812
Epoc

In [30]:
## Checking the accuracy


def check_accuracy(loader, model):
    if loader.dataset.train:
        print(" checking accuracy on training data")
    else:
        print("checking accuracy on testing data")


    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device = device).squeeze(1)
            y = y.to(device = device)

            scores = model(x)
            _, predictions = scores.max(1)

            num_correct +=(predictions == y).sum()
            num_samples +=predictions.size(0)

        print(" Got {}/{} with accuracy of {}".format(num_correct, num_samples, float(num_correct)/float(num_samples)*100))


    model.train()

In [31]:
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

 checking accuracy on training data
 Got 58707/60000 with accuracy of 97.845
checking accuracy on testing data
 Got 9778/10000 with accuracy of 97.78
