<a href="https://colab.research.google.com/github/aishoo1612/Computer-Vision/blob/master/Implementing_LSTM_with_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import torch 
import torch.nn as nn
import torchvision

from torchvision import transforms, datasets, models

In [6]:
train_dataset = datasets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = datasets.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!




In [11]:
print(train_dataset.train_data.size())
print(test_dataset.test_data.size())

torch.Size([60000, 28, 28])
torch.Size([10000, 28, 28])




In [12]:
#iterable object for our model 


train_loader = torch.utils.data.DataLoader( train_dataset, batch_size= 64, shuffle= True)

test_loader = torch.utils.data.DataLoader( test_dataset, batch_size= 64, shuffle= True)

In [18]:
#creating LSTM model 

class LSTMModel(nn.Module):
  def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
    super(LSTMModel, self).__init__()
    #hidden dimension 
    self.hidden_dim = hidden_dim 

    #layer dimension 
    self.layer_dim = layer_dim 

    #output dimension 
    self.output_dim = output_dim 
    
    #building LSTM 
    #batch_first = True, because data can be out of shape 
    self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)

    #readout layer 
    #first layer has linear function such as : ax +b 
    self.fc = nn.Linear(hidden_dim, output_dim)


#forward function as usual ;)

def forward(self, x):
  #initialize the hidden layer with zeroes :
  hidden_0 = torch.zeroes(self.layer_dim, x.size(0), self.hidden_dim).requires_grad()

  #initializing the cell state with zeroes ;
  cell_0 = torch.zeroes(self.layer_dim, x.size(0), self.hidden_dim).requires_grad()

  #As the images are 28*28, we need to run through it 28 times(unrolling), 
  #Now we have to run through them all at once, because we are using BPTT,
  #it finds the derivatives of the error, and then adjusts weights accordingly 

  out, (hn, cn) = self.lstm(x, (hidden_0.detach(), cell_0.detach()))#.detach(), stops those elements from getting backpropagated again, once after their values have been propogated

  out = self.fc(out[:, -1, :])






  





In [19]:
#Instaniate LSTM :

input_dim = 28 #(each image is of 28*28 in MNIST)
hidden_dim = 100 
layer_dim = 1
output_dim = 10 #0->9)

In [20]:
model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)

In [22]:
#Calculating loss, 
#since it is a classification problem, we will be using CSE (Cross Entropy Loss as usual): 

criterion = nn.CrossEntropyLoss()

import torch.optim as optim


In [23]:
#optimizer Class :
#parameters = parameters - learning_rate * parameters_gradients

#mini Batch stochiastic gradient descent : 
lr= 0.1 
optimizer = optim.SGD(model.parameters(), lr = 0.1)


In [24]:
len(list(model.parameters()))

6

In [25]:
for i in range(len(list(model.parameters()))):
    print(list(model.parameters())[i].size())

torch.Size([400, 28])
torch.Size([400, 100])
torch.Size([400])
torch.Size([400])
torch.Size([10, 100])
torch.Size([10])


In [26]:
# Number of steps to unroll
seq_dim = 28  

iter = 0
for epoch in range(3000):
    for i, (images, labels) in enumerate(train_loader):
        # Load images as a torch tensor with gradient accumulation abilities
        images = images.view(-1, seq_dim, input_dim).requires_grad_()

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        # outputs.size() --> 100, 10
        outputs = model(images)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                # Resize images
                images = images.view(-1, seq_dim, input_dim)

                # Forward pass only to get logits/output
                outputs = model(images)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted == labels).sum()

            accuracy = 100 * correct / total

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

NotImplementedError: ignored