<a href="https://colab.research.google.com/github/Vyoma-garg/Natural-Language-Processing/blob/main/Bidirectional_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn  #all the NN network, CNN, Modules, loss functions, activation fns
import torch.optim as optim  #SGD, ADAM
import torch.nn.functional as F  #activation fn, 
from torch.utils.data import DataLoader  #easier data managemnet, creates mini batches of the data
import torchvision.datasets as datasets   #standard datasets MNISt ,etc
import torchvision.transforms as transforms  # transformations 

Create Bidirectional LSTM

In [None]:
class BiLSTM (nn.Module): #inherit from nn.Module class
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiLSTM,self).__init__()   
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bilstm= nn.LSTM(input_size, hidden_size, num_layers, batch_first=True,
                             bidirectional=True )
        #batch_size x seq x features 

        self.fc = nn.Linear(hidden_size*2,num_classes)  #for each hidden state

    def forward(self, x): 
        h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device)
         # init as num of layers, size: how many min batches we need to send in 
        c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device)

         #forward 
        out,(hidden_state,cell_state) = self.bilstm(x, (h0,c0))
        #out=out.reshape(out.shape[0],-1)  #28x256  only usimg the last hidden state
        out = self.fc(out[:,-1,:])

        return out



Checking 

In [None]:
#model = CNN()
#x = torch.randn(64,1,28,28)  #randomly generated data, batch size=4, number of examples used simultaneously
#print(x)
#print('\n',model(x).shape,'\n')  #64x10   
#print(model(x)) 

Setting device

In [None]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

Hyperparameters

In [None]:
input_size=28  #28 times seq, each seq have 28 features
sequence_length=28  #each row at time stamp
num_layers=2
hidden_size=256
num_classes=10
learning_rate=0.001
batch_size=64
num_epochs=1

Loading dataset

In [None]:
train_dataset = datasets.MNIST(root = '/content', train = True , transform = transforms.ToTensor(), download = True)
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_dataset = datasets.MNIST(root = '/content', train = False , transform = transforms.ToTensor(), download = True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /content/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting /content/MNIST/raw/train-images-idx3-ubyte.gz to /content/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /content/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting /content/MNIST/raw/train-labels-idx1-ubyte.gz to /content/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /content/MNIST/raw/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to /content/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting /content/MNIST/raw/t10k-images-idx3-ubyte.gz to /content/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /content/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting /content/MNIST/raw/t10k-labels-idx1-ubyte.gz to /content/MNIST/raw

Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Initialize network

In [None]:
model=BiLSTM(input_size, hidden_size, num_layers, num_classes).to(device)

Loss and Optimizer

In [None]:
criterion= nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=learning_rate)

Train Network

In [None]:
for epoch in range(num_epochs):
  for batch_idx,(data,targets) in enumerate(train_loader):  #enumerate helps in getting the batch index of the data 
     #Get data to cuda
      data = data.to(device).squeeze(1)  #([64,1,28,28])  --> ([64,28,28])
      targets = targets.to(device)

      #print(data.shape)  #([64,1,28,28])  num of examples, number of channels(black white img), height width of each image
                        
      #get correct shape
      #data=data.reshape(data.shape[0],-1)  #flatten to single dim: ([64,784])
      #print(data.shape) 

      #forward
      scores=model(data)
      loss=criterion(scores, targets) 

      #backward
      optimizer.zero_grad() #
      loss.backward()

      #gradient descent or adam step
      optimizer.step() #update weights based on the gradients



Check accuracy

In [None]:
def check_accuracy(loader,model):
  if loader.dataset.train:
        print('Checking Accuracy on Training Data')
  else:
        print('Checking Accuracy on Test Data')
  num_correct=0
  num_samples=0
  model.eval()

  with torch.no_grad():  #while checking accuracy no need to calculate the gradients again
    for x,y in loader:
      x=x.to(device=device).squeeze(1)
      y=y.to(device=device)
      #x=x.reshape(x.shape[0],-1)

      scores=model(x)  #64x10 shape of the scores, which is max of those 10 digits
      values,predictions =scores.max(1)  #interested in index of the max value
      num_correct += (predictions==y).sum()
      num_samples += predictions.size(0)  #first dimension that is 64 x 10==64
    print(f'Got {num_correct}/ {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f} \n')

  model.train()
  

check_accuracy(train_loader, model)
check_accuracy(test_loader, model)



Checking Accuracy on Training Data
Got 57138/ 60000 with accuracy 95.23 

Checking Accuracy on Test Data
Got 9524/ 10000 with accuracy 95.24 

