In [4]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [5]:
import torch
import random
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from tqdm.auto import tqdm

In [6]:
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#### For understanding how the input output works for RNNs

In [7]:
embedding_size = 28
hidden_size = 1000
batch_size = 10000
sequence_length = 28
num_layers = 1
bidirectional = False

rnn = nn.RNN(
            input_size = embedding_size,
            hidden_size = hidden_size,
            num_layers = num_layers,
            bidirectional = bidirectional,
            batch_first = True
        )

In [8]:
X = torch.randn((batch_size, sequence_length, embedding_size))
h_0 = torch.zeros(((int(bidirectional) + 1)*num_layers, batch_size, hidden_size))

In [9]:
print(f"Input Vector size: - {X.size()}")
print(f"Initial Hidden State size: - {h_0.size()}")

Input Vector size: - torch.Size([10000, 28, 28])
Initial Hidden State size: - torch.Size([1, 10000, 1000])


In [10]:
outputs, h_n = rnn(X, h_0)

In [11]:
print(f"Output size: - {outputs.size()}")
print(f"Final Hidden State size: - {h_n.size()}")

Output size: - torch.Size([10000, 28, 1000])
Final Hidden State size: - torch.Size([1, 10000, 1000])


In [12]:
h_n.squeeze_(0).size()

torch.Size([10000, 1000])

#### Implementing the Architecture using Unidirectional and 1 Layer RNN

In [42]:
class RNN(nn.Module):
    def __init__(self,batch_size, embedding_size, hidden_size, num_classes):
        super(RNN, self).__init__()
        self.batch_size = batch_size
        self.embedding_size = embedding_size #aka input_size
        self.hidden_size = hidden_size
        self.num_classes = num_classes
        
        #defining layers
        self.rnn = nn.RNN(
            input_size = self.embedding_size,
            hidden_size = self.hidden_size,
            num_layers = 1,
            batch_first = True
        )
        self.linear = nn.Linear(in_features = self.hidden_size,out_features = self.num_classes)
        
        #defining activation function
        self.tanh = nn.Tanh()
        
    def forward(self, X):
        #X.size() = (batch_size = self.batch_size, sequence_length = 28, input_size = 28)
        _,X = self.rnn(X.squeeze(1))
        X = self.tanh(X)
        X = self.linear(X.squeeze(0))
        return X

#### Hyperparameters

In [43]:
in_channels = 1
num_classes = 10
learning_rate = 0.001
batch_size = 10000
num_epochs = 100

#### Loading Data (Can also load from digit recognizer but have to do some preprocessing)

In [44]:
#Downloading Data Set of MNIST
train_dataset = datasets.MNIST(root="/kaggle/working/", train = True, transform = transforms.ToTensor(), download = True)
test_dataset = datasets.MNIST(root="/kaggle/working/", train = False, transform = transforms.ToTensor(), download = True)

In [45]:
#Creating Generator a.k.a Dataloader
train_dataloader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_dataloader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = True)

#### Creating and Instance of model

In [46]:
model = RNN(batch_size = batch_size, embedding_size = 28, hidden_size = 1000, num_classes =10)

In [47]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
model = model.to(device)
for epoch in tqdm(range(num_epochs + 1)):
    epoch_loss = 0
    for batch in train_dataloader:
        batch[0] = batch[0].to(device)
        batch[1] = batch[1].to(device)
        inference = model.forward(batch[0])
        
        loss = criterion(inference, batch[1])
        
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
        epoch_loss += loss.item()
    if epoch%10 == 0:
        print(f"epoch:- {epoch.__str__().zfill(3)} loss :- {epoch_loss/len(train_dataloader)}")

  0%|          | 0/101 [00:00<?, ?it/s]

epoch:- 000 loss :- 2.4832040270169577
epoch:- 010 loss :- 2.3023281494776406
epoch:- 020 loss :- 2.3016357819239297
epoch:- 040 loss :- 1.618962029616038
epoch:- 050 loss :- 1.2499530911445618
epoch:- 060 loss :- 0.911984254916509
epoch:- 070 loss :- 0.6202460825443268
epoch:- 080 loss :- 0.6948716839154562
epoch:- 090 loss :- 0.32425135374069214
epoch:- 100 loss :- 0.24893589317798615


In [51]:
def evaluate(model, dataloader):
    model.eval()
    with torch.no_grad():
        num_correct = 0
        total_example = 0
        for batch in dataloader:
            batch[0] = batch[0].to(device)
            batch[1] = batch[1].to(device)

            inference = model.forward(batch[0])
            #output of max is max_value for each example with index location of the max in the last 10 neuron (y_pred) so no need of using softmax activation
            _, y_pred = torch.max(inference, dim = 1)
            num_correct += (y_pred == batch[1]).sum()
            total_example += inference.shape[0]
            
    print(f"Accuracy:- {num_correct/total_example}")
            

In [52]:
# Evaluation on Test Dataset
evaluate(model, test_dataloader)

Accuracy:- 0.9299999475479126


In [53]:
# Evaluation on Train Dataset
evaluate(model, train_dataloader)

Accuracy:- 0.9287833571434021
