# BiLSTM implementation

In [2]:
import torch 
import torchvision 
import torch.nn as nn 
import torch.nn.functional as F
import torch.optim as optim 
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# Set Device

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyper Parameters

In [5]:
input_size = 28 
sequence_length = 28 
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2

# Load the datasets 

In [6]:
train_dataset = datasets.MNIST(root="content/",train=True,transform = transforms.ToTensor(),download=True)
train_loader = DataLoader(dataset=train_dataset,batch_size = batch_size,shuffle=True)

test_dataset = datasets.MNIST(root="content/",train=False,transform = transforms.ToTensor(),download=True)
test_loader = DataLoader(dataset=test_dataset,batch_size = batch_size,shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to content/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting content/MNIST/raw/train-images-idx3-ubyte.gz to content/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to content/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting content/MNIST/raw/train-labels-idx1-ubyte.gz to content/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to content/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting content/MNIST/raw/t10k-images-idx3-ubyte.gz to content/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to content/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting content/MNIST/raw/t10k-labels-idx1-ubyte.gz to content/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


# Model 

In [15]:
class BRNN(nn.Module):
  def __init__(self,input_size,hidden_size,num_layers,num_classes):
   super(BRNN,self).__init__()
   self.hidden_size = hidden_size
   self.num_layers = num_layers
   self.lstm = nn.LSTM(input_size,hidden_size,num_layers,batch_first = True,bidirectional = True)
   # Bi-Directional LSTM x2
   self.fc = nn.Linear(hidden_size*2,num_classes)

  def forward(self,x):
    h0 = torch.zeros(self.num_layers*2,x.size(0),self.hidden_size).to(device)    
    c0 = torch.zeros(self.num_layers*2,x.size(0),self.hidden_size).to(device)
    out,_ = self.lstm(x,(h0,c0))
    out = self.fc(out[:,-1,:])
    return out


In [16]:
model = BRNN(input_size,hidden_size,num_layers,num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=learning_rate)

In [17]:
import time 
total_time = time.time()
for epoch in range(num_epochs):
  epoch_time = time.time()
  print(f"Epoch : {epoch}")
  for batch_idx,(data,targets) in enumerate(train_loader):
    # get data to cuda
    data = data.to(device=device).squeeze(1)
    targets = targets.to(device=device)

    #forward
    scores = model(data)
    # loss value
    loss = criterion(scores,targets)
    # backward prop
    optimizer.zero_grad()  # sets previous epoch gradient zero
    loss.backward()

    # gradient descent
    optimizer.step()
  print(f"Loss = {loss:.4f}")
  print(f"Time : {time.time()-epoch_time:.3f} sec")
  print("------------------------------------------------")

time_taken = time.time() - total_time
print(f"Overall Model Execution time : {time_taken:.3f} seconds" )
print(f"Overall Model Execution time : {time_taken/60:.3f} minutes")
print(f"Overall Model Execution time : {time_taken/3600:.3f} hours")


Epoch : 0
Loss = 0.0623
Time : 12.262 sec
------------------------------------------------
Epoch : 1
Loss = 0.0236
Time : 12.463 sec
------------------------------------------------
Overall Model Execution time : 24.726 seconds
Overall Model Execution time : 0.412 minutes
Overall Model Execution time : 0.007 hours


In [18]:
def  checkAccuracy(loader,model):
  if loader.dataset.train:
    print("Accuracy on Training Dataset")
  else:
    print("Accuracy on the Testing Dataset")
  
  num_correct = 0
  num_samples = 0 
  model.eval()

  with torch.no_grad():
    for x,y in loader:
      x = x.to(device=device).squeeze(1)
      y = y.to(device=device)
      scores = model(x)
      _,predictions = scores.max(1)
      num_correct+= (predictions==y).sum()
      num_samples+=predictions.size(0)
    
    print(f" Got {num_correct}/ {num_samples} with accuracy : {float(num_correct)/float(num_samples)*100:.2f}")
  model.train()

In [19]:
checkAccuracy(train_loader,model)
checkAccuracy(test_loader,model)    

Accuracy on Training Dataset
 Got 58736/ 60000 with accuracy : 97.89
Accuracy on the Testing Dataset
 Got 9772/ 10000 with accuracy : 97.72
