In [None]:
# !pip install wandb --upgrade
# !pip install keras

In [1]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [138]:
# Importing relevant libraries
import numpy as np
import torch
# import tensorflow as tf
import keras
import matplotlib.pyplot as plt
# from keras.datasets import mnist

from pprint import pprint

In [111]:
# from torchvision import datasets
# from torchvision.transforms import ToTensor
# from torch.utils.data import DataLoader
from torch import optim
import torch.nn as nn

In [112]:
import time

In [137]:
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda
GeForce 920M
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [114]:
# Directories to store the models and plots
direc_main = './drive/MyDrive/Semester_7/Advanced_Signal_Processing/A4_RNN/'
direc_pics = './drive/MyDrive/Semester_7/Advanced_Signal_Processing/A4_RNN/pics/'

# Data Loading

## From Keras

In [115]:
def loadMNIST(return_images=False, is_val_split=False, val_samples=10000, seed_value=1):
    '''
    ################
    Function which returns a dictionary containing the shuffled version of the MNIST dataset.
    Arguments:
    return_images   [bool]  Whether we need to return images            (default: False)
                                True: returns images
                                False: returns flattened vectors
    is_val_split    [bool]  Whether validation split needs to be done   (default: False)
                                True: returns train(60K-val_samples), validation(val_samples) and test(10K)
                                False: returns train(60K) and test(10K)
    val_samples     [int]   Number of validation samples to take out of the training set of 60K samples     (default: 10K)
    seed            [int]   Seed value for the numpy random shuffling   (default: 1)
    ########
    Return:
    Dictionary containing the numpy arrays corresponding to train, test and val(if is_val_split==True)
                    dict:   train:  X
                                    y
                            test:   X
                                    y
                            val:    X           (if is_val_split == True)
                                    y           (if is_val_split == True)
    ################
    '''

    # Loading MNIST data onto numpy arrays
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    # Shuffling data
    np.random.seed(seed_value)
    train_shuffler = np.random.permutation(60000)
    x_train, y_train = x_train[train_shuffler], y_train[train_shuffler]
    test_shuffler = np.random.permutation(10000)
    x_test, y_test = x_test[test_shuffler], y_test[test_shuffler]

    # Normalizing the input data
    x_train = np.array(x_train/255.0, dtype='float32')
    x_test = np.array(x_test/255.0, dtype='float32')

    ## Splitting data appropriately
    # Number of training samples 
    train_samples = 60000 - val_samples

    if is_val_split == True:
        # Splitting the train set into the new train and val sets
        x_train, x_val = x_train[:train_samples], x_train[train_samples:]
        y_train, y_val = y_train[:train_samples], y_train[train_samples:]

        # Flattening the 28x28 images into vectors and then returning
        if (return_images==False):
            return {
                'train':{
                    'X': x_train.reshape([train_samples, 784]),
                    'Y': y_train.reshape([train_samples])
                },
                'val':{
                    'X': x_val.reshape([val_samples, 784]),
                    'Y': y_val.reshape([val_samples])
                },
                'test':{
                    'X': x_test.reshape([10000, 784]),
                    'Y': y_test.reshape([10000])
                }
            }
        # Returning the 28x28 images as is
        else :
            return {
                'train':{
                    'X': x_train,
                    'Y': y_train
                },
                'val':{
                    'X': x_val,
                    'Y': y_val
                },
                'test':{
                    'X': x_test,
                    'Y': y_test
                }
            }
    # Training set is not split
    else:
        # Flattening the images and then returning
        if (return_images==False):
            return {
                'train':{
                    'X': x_train.reshape([60000, 784]),
                    'Y': y_train.reshape([60000])
                },
                'test':{
                    'X': x_test.reshape([10000, 784]),
                    'Y': y_test.reshape([10000])
                }
            }
        # Returning the images as is
        else :
            return {
                'train':{
                    'X': x_train,
                    'Y': y_train
                },
                'test':{
                    'X': x_test,
                    'Y': y_test
                }
            }

In [119]:
def makeOneHot(data):
    """
    Function to make the numeric labels into one-hot representation.
    Arguments:
    data            Dictionary as generated by loadMNIST()

    Returns:
    Similar dictionary as data but with one-hot arrays for the labels
    """
    # Initializing the dict
    data_mod = {}
    for key in data:
        data_mod[key] = {}
        data_mod[key]['X'] = data[key]['X']
        y = np.zeros((data[key]['Y'].shape[0], 10))
        y[range(data[key]['Y'].shape[0]), data[key]['Y']] = 1
        data_mod[key]['Y'] = y

    return data_mod

In [120]:
# Loading data as flattened vectors with a validation set
val_samples = 10000
data = loadMNIST(return_images=True, is_val_split=True, val_samples=val_samples, seed_value=1)
data_oh = makeOneHot(data)

print(data_oh.keys())
print(data_oh['train']['X'][0,0].shape)
print(data_oh['train']['Y'].shape)
print(data_oh['test']['X'].shape)
print(data_oh['test']['Y'].shape)
print(data_oh['val']['X'].shape)
print(data_oh['val']['Y'].shape)

print(data['test']['Y'].shape)
print(data['val']['Y'].shape)
print(data['val']['Y'].shape)
print(data_oh['val']['Y'][:8])
print(data['val']['Y'][:8])


dict_keys(['train', 'val', 'test'])
(28,)
(50000, 10)
(10000, 28, 28)
(10000, 10)
(10000, 28, 28)
(10000, 10)
(10000,)
(10000,)
(10000,)
[[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]]
[2 0 6 1 9 0 2 8]


In [122]:
# Initializing pytorch dataset from the numpy datasets we have so far
data_torch = {
    'train': {
        'X': torch.from_numpy(data['train']['X']),
        'Y': torch.from_numpy(data['train']['Y'].astype(np.int64))
    },
    'test': {
        'X': torch.from_numpy(data['test']['X']),
        'Y': torch.from_numpy(data['test']['Y'].astype(np.int64))
    },
    'val':{
        'X': torch.from_numpy(data['val']['X']),
        'Y': torch.from_numpy(data['val']['Y'].astype(np.int64))
    }
}

train_data = torch.utils.data.TensorDataset(data_torch['train']['X'], data_torch['train']['Y'])
test_data = torch.utils.data.TensorDataset(data_torch['test']['X'], data_torch['test']['Y'])
val_data = torch.utils.data.TensorDataset(data_torch['val']['X'], data_torch['val']['Y'])

In [None]:
# Plotting 25 random data points from the dataset to get an idea of the dataset
prefix = direc_pics + 'dset_'
figure = plt.figure(figsize=(8, 8))
cols, rows = 5, 5
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(train_data), size=(1,)).item()
    img, label = train_data[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.title('Number {}'.format(label), fontsize=9)
    plt.axis("off")
    plt.imshow(torch.reshape(img, (28,28)), cmap="gray")
plt.savefig(prefix + '25point.png',bbox_inches='tight')
plt.show()

In [123]:
# Train has 600 batches of size 100. Test and Val are just 1 batch.
loaders = {
    'train' : torch.utils.data.DataLoader(train_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=1),
    
    'test'  : torch.utils.data.DataLoader(test_data, 
                                          batch_size=10000, 
                                          shuffle=True, 
                                          num_workers=1),

    'val'   : torch.utils.data.DataLoader(val_data, 
                                          batch_size=val_samples, 
                                          shuffle=True, 
                                          num_workers=1),
}

# Training and Testing Functions

In [124]:
def train_loop(loaders, model, loss_fn, optimizer, interval=75):
    '''
    Function to train the model and log required information
    Arguments:
    loaders                 dict containing DataLoader objects for the data
    model                   The neural network we want to train
    loss_fn                 The loss function we are trying to minimize
    optimizer               Optimizer that we will use
    interval                Interval between logging of loss & calculating test metrics [default: 75]
    Returns:    Dict containing lists of training losses and test losses.
    '''
    dataloader = loaders['train']
    size = len(dataloader.dataset)
    losses = []
    losses_test = []
    acc_test = []


    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        temp = model(X)
        loss = loss_fn(input=temp['out'], target=y)
        losses.append(loss.item())

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % interval == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"Loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            temp1 = test_loop(loaders, model, loss_fn, option='val')
            losses_test.append(temp1['loss'])
            acc_test.append(temp1['acc'])
        
    return {
        'losses': losses,
        'losses_test': losses_test,
        'acc_test': acc_test
    }


def test_loop(loaders, model, loss_fn, option='val'):
    '''
    Function to calculate loss and accuracy of the model on the test set.
    Arguments:
    loaders                 dict containing DataLoader objects for the data
    model                   The neural network we want to test
    loss_fn                 The loss function we are trying to minimize in training
    option                  String to indicate which dataset to use for testing ['val', 'test']
    Returns:    Dict containing loss and accuracy of the model on the test dataset
    '''
    dataloader = loaders[option]
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            temp = model(X)
            test_loss += loss_fn(input=temp['out'], target=y).item()
            correct += (temp['preds'] == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"{'Validation' if option=='val' else 'Test'} Metrics: \nAccuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

    return {
        'loss': test_loss,
        'acc': 100*correct
    }


In [125]:
def train_loop_lite(loaders, model, loss_fn, optimizer, interval=75):
    '''
    Function to train the model and log required information. This is just a lighter version without all the loss logging.
    Arguments:
    loaders                 dict containing DataLoader objects for the data
    model                   The neural network we want to train
    loss_fn                 The loss function we are trying to minimize
    optimizer               Optimizer that we will use
    interval                Interval between logging of loss & calculating test metrics [default: 75]
    Returns:    None
    '''
    dataloader = loaders['train']
    size = len(dataloader.dataset)

    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        temp = model(X)
        loss = loss_fn(input=temp['out'], target=y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % interval == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"Loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            test_loop_lite(loaders, model, loss_fn, option='val')


def test_loop_lite(loaders, model, loss_fn, option='val'):
    '''
    Function to calculate loss and accuracy of the model on the test set. This is just a lighter version without all the loss logging.
    Arguments:
    loaders                 dict containing DataLoader objects for the data
    model                   The neural network we want to test
    loss_fn                 The loss function we are trying to minimize in training
    option                  String to indicate which dataset to use for testing ['val', 'test']
    Returns:    None
    '''
    dataloader = loaders[option]
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            temp = model(X)
            test_loss += loss_fn(input=temp['out'], target=y).item()
            correct += (temp['preds'] == y).type(torch.float).sum().item()

    correct /= size
    test_loss /= num_batches
    print(f"Test Metrics: \nAccuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


In [127]:
def plotGraphs(losses, losses_test, acc_test, interval, prefix='', saving=True):
    '''
    Function to do all the relevant plotting of losses vs iterations
    Arguments:
    losses          List containing loss on the training set every iteration
    losses_test     List containing loss on the test set every 'interval' iterations
    acc_test        Accuracy of prediction on the test set every 'interval' iterations
    interval        Number of iterations between test set evaluations during training
    prefix          Prefix to be added to plot names when saving     (default: '')
    saving          Boolean to decide whether we want to save the plots     (default: True)
    '''
    # Number of iterations carried out during training
    num_iters = len(losses)

    plt.figure()
    plt.plot(np.arange(num_iters), losses)
    plt.grid()
    plt.xlabel('Iterations')
    plt.ylabel('Loss')
    plt.title('Train Loss Plot')
    if saving:
        plt.savefig(prefix + '_train_loss.png',bbox_inches='tight')
    plt.show()

    plt.figure()
    plt.plot(list(np.arange(1, num_iters-1, interval)) + [num_iters], losses_test)
    plt.grid()
    plt.xlabel('Iterations')
    plt.ylabel('Loss')
    plt.title('Test Loss Plot')
    if saving:
        plt.savefig(prefix + '_test_loss.png',bbox_inches='tight')
    plt.show()

    plt.figure()
    plt.plot(list(np.arange(1, num_iters-1, interval)) + [num_iters], acc_test)
    plt.grid()
    plt.xlabel('Iterations')
    plt.ylabel('Accuracy')
    plt.title('Test Accuracy Plot')
    if saving:
        plt.savefig(prefix + '_test_acc.png',bbox_inches='tight')
    plt.show()


# MNIST using RNN


## Vanilla RNN

In [128]:
class VanillaRNN(nn.Module):
    def __init__(self, n_neurons=128, num_layers=1, n_steps=28, n_inputs=28, n_outputs=10):
        super(VanillaRNN, self).__init__()

        self.n_neurons = n_neurons
        self.num_layers = num_layers
        self.n_steps = n_steps
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.rnn = nn.RNN(
            input_size=self.n_inputs,
            hidden_size=self.n_neurons,
            num_layers=self.num_layers,
            batch_first=True
        )
        self.FC = nn.Linear(self.n_neurons, self.n_outputs)


    def forward(self, X):
        states, _ = self.rnn(X)
        fcout = self.FC(states[:,-1,:])
        out = nn.Softmax(dim=1)(fcout)
        preds = torch.argmax(out, dim=1)

        return {
			'in': X,
			'out': out,
			'preds': preds,
            'fcout': fcout,
			'states':states
		}

In [129]:
# Grid Search of Parameters
layers_list = [1,2]
neurons_list = [32,64]
metricsD = {}
for layer in layers_list:
    metricsD[layer] = {}
    for neuron in neurons_list:
        metricsD[layer][neuron] = {}

# Dictionary of models
modelsD = {}
for layer in layers_list:
    modelsD[layer] = {}
    for neuron in neurons_list:
        model = VanillaRNN(n_neurons=neuron, num_layers=layer)
        modelsD[layer][neuron] = model

# RNN1 = VanillaRNN(n_neurons=128, num_layers=1)
# print(RNN1)

In [130]:
# Cross Entropy Loss
loss_fn = nn.CrossEntropyLoss()
# Number of Epochs
num_epochs = 1

In [131]:
for layer in modelsD:
    for neuron in modelsD[layer]:
        model = modelsD[layer][neuron]
        print('\n&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')
        print(f'CONFIGURATION: num_layers = {layer}, hidden_size = {neuron}')

        # Adam optimizer
        optimizer = optim.Adam(model.parameters())

        # Measuring time taken for the training process
        losses = []
        losses_test = []
        acc_test = []
        interval = 50
        start = time.time()
        for i in range(num_epochs):
            print(f"Epoch {i+1}\n-------------------------------")
            
            # Training the network for this epoch
            temp = train_loop(loaders, model, loss_fn, optimizer, interval)
            losses += temp['losses']
            losses_test += temp['losses_test']
            acc_test += temp['acc_test']

        # Testing on the validation set
        print('\n-----------------------------------------')
        print('Performance on the Validation set...')
        temp = test_loop(loaders, model, loss_fn, option='val')
        losses_test.append(temp['loss'])
        acc_test.append(temp['acc'])

        # Testing on the test set
        print('Performance on the Test set...')
        temp = test_loop(loaders, model, loss_fn, option='test')

        # Logging the losses and accuracies obtained
        metricsD[layer][neuron]['l'] = losses
        metricsD[layer][neuron]['lt'] = losses_test
        metricsD[layer][neuron]['at'] = acc_test

        end = time.time()
        print('-----------------------------------------')
        print('-----------------------------------------')
        print('Time taken for the training: {0:.5f} seconds'.format(end-start))
        print('-----------------------------------------')



&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&

CONFIGURATION: num_layers = 1, hidden_size = 32
Epoch 1
-------------------------------
Loss: 2.302307  [    0/50000]
Validation Metrics: 
Accuracy: 11.6%, Avg loss: 2.302351 

Loss: 2.283096  [ 5000/50000]
Validation Metrics: 
Accuracy: 20.8%, Avg loss: 2.281933 

Loss: 2.169864  [10000/50000]
Validation Metrics: 
Accuracy: 33.4%, Avg loss: 2.171128 

Loss: 2.096137  [15000/50000]
Validation Metrics: 
Accuracy: 37.6%, Avg loss: 2.110651 

Loss: 2.128790  [20000/50000]
Validation Metrics: 
Accuracy: 38.0%, Avg loss: 2.087060 

Loss: 2.080314  [25000/50000]
Validation Metrics: 
Accuracy: 39.4%, Avg loss: 2.070442 

Loss: 2.001274  [30000/50000]
Validation Metrics: 
Accuracy: 44.0%, Avg loss: 2.042150 

Loss: 1.995963  [35000/50000]
Validation Metrics: 
Accuracy: 46.3%, Avg loss: 2.024042 

Loss: 2.010761  [40000/50000]
Validation Metrics: 
Accuracy: 42.5%, Avg loss: 2.056270 

Loss: 2.056288  [45000/50000]
Validation Metrics: 
Accur

In [None]:
# Plotting the relevant graphs and plots
saving = False
dataloader = loaders['test']
for layer in modelsD:
    for neuron in modelsD[layer]:
        model = modelsD[layer][neuron]
        prefix = direc_pics + 'mnist_RNN_l{}_n{}'.format(layer, neuron)
        plotGraphs(
            losses = metricsD[layer][neuron]['l'],
            losses_test = metricsD[layer][neuron]['lt'],
            acc_test = metricsD[layer][neuron]['at'],
            interval = interval,
            prefix = prefix,
            saving = saving
        )

        # Plotting few pictures with true and predicted labels
        with torch.no_grad():
            for X, y in dataloader:
                temp = model(X)
            
            figure = plt.figure(figsize=(10, 10))
            cols, rows = 5, 5
            for i in range(1, cols * rows + 1):
                sample_idx = torch.randint(5000, size=(1,)).item()
                img, label = X[sample_idx], y[sample_idx]
                figure.add_subplot(rows, cols, i)
                plt.title('Truth: {} | Pred: {}'.format(label, temp['preds'][sample_idx]), fontsize=9)
                plt.axis("off")
                plt.imshow(img.squeeze(), cmap="gray")
            if saving:
                plt.savefig(prefix + '_samples.png',bbox_inches='tight')
            plt.show()


In [None]:
# Saving the trained network
for layer in modelsD:
    for neuron in modelsD[layer]:
        torch.save(modelsD[layer][neuron], direc_main+'mnist_RNN_l{}_n{}.pth'.format(layer, neuron))


In [None]:
# The exact same above has to be done with regularization on a new set of models


## GRU

In [None]:
class GRU(nn.Module):
    def __init__(self, n_neurons=128, num_layers=1, n_steps=28, n_inputs=28, n_outputs=10):
        super(GRU, self).__init__()
        
        self.n_neurons = n_neurons
        self.num_layers = num_layers
        self.n_steps = n_steps
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.gru = nn.GRU(
            input_size=self.n_inputs,
            hidden_size=self.n_neurons,
            num_layers=self.num_layers,
            batch_first=True
        )
        self.FC = nn.Linear(self.n_neurons, self.n_outputs)
        
    def forward(self, X):
        states, _ = self.gru(X)
        fcout = self.FC(states[:,-1,:])
        out = nn.Softmax(dim=1)(fcout)
        preds = torch.argmax(out, dim=1)

        return {
			'in': X,
			'out': out,
			'preds': preds,
            'fcout': fcout,
			'states':states
		}


In [None]:
# Grid Search of Parameters
layers_list = [1,2]
neurons_list = [32,64]
metricsD = {}
for layer in layers_list:
    metricsD[layer] = {}
    for neuron in neurons_list:
        metricsD[layer][neuron] = {}

# Dictionary of models
modelsD = {}
for layer in layers_list:
    modelsD[layer] = {}
    for neuron in neurons_list:
        model = GRU(n_neurons=neuron, num_layers=layer)
        modelsD[layer][neuron] = model


In [None]:
# Cross Entropy Loss
loss_fn = nn.CrossEntropyLoss()
# Number of Epochs
num_epochs = 1

In [None]:
for layer in modelsD:
    for neuron in modelsD[layer]:
        model = modelsD[layer][neuron]
        print('\n&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')
        print(f'CONFIGURATION: num_layers = {layer}, hidden_size = {neuron}')

        # Adam optimizer
        optimizer = optim.Adam(model.parameters())

        # Measuring time taken for the training process
        losses = []
        losses_test = []
        acc_test = []
        interval = 50
        start = time.time()
        for i in range(num_epochs):
            print(f"Epoch {i+1}\n-------------------------------")
            
            # Training the network for this epoch
            temp = train_loop(loaders, model, loss_fn, optimizer, interval)
            losses += temp['losses']
            losses_test += temp['losses_test']
            acc_test += temp['acc_test']

        # Testing on the validation set
        print('\n-----------------------------------------')
        print('Performance on the Validation set...')
        temp = test_loop(loaders, model, loss_fn, option='val')
        losses_test.append(temp['loss'])
        acc_test.append(temp['acc'])

        # Testing on the test set
        print('Performance on the Test set...')
        temp = test_loop(loaders, model, loss_fn, option='test')

        # Logging the losses and accuracies obtained
        metricsD[layer][neuron]['l'] = losses
        metricsD[layer][neuron]['lt'] = losses_test
        metricsD[layer][neuron]['at'] = acc_test

        end = time.time()
        print('-----------------------------------------')
        print('-----------------------------------------')
        print('Time taken for the training: {0:.5f} seconds'.format(end-start))
        print('-----------------------------------------')


In [None]:
# Plotting the relevant graphs and plots
saving = False
dataloader = loaders['test']
for layer in modelsD:
    for neuron in modelsD[layer]:
        model = modelsD[layer][neuron]
        prefix = direc_pics + 'mnist_GRU_l{}_n{}'.format(layer, neuron)
        plotGraphs(
            losses = metricsD[layer][neuron]['l'],
            losses_test = metricsD[layer][neuron]['lt'],
            acc_test = metricsD[layer][neuron]['at'],
            interval = interval,
            prefix = prefix,
            saving = saving
        )

        # Plotting few pictures with true and predicted labels
        with torch.no_grad():
            for X, y in dataloader:
                temp = model(X)
            
            figure = plt.figure(figsize=(10, 10))
            cols, rows = 5, 5
            for i in range(1, cols * rows + 1):
                sample_idx = torch.randint(5000, size=(1,)).item()
                img, label = X[sample_idx], y[sample_idx]
                figure.add_subplot(rows, cols, i)
                plt.title('Truth: {} | Pred: {}'.format(label, temp['preds'][sample_idx]), fontsize=9)
                plt.axis("off")
                plt.imshow(img.squeeze(), cmap="gray")
            if saving:
                plt.savefig(prefix + '_samples.png',bbox_inches='tight')
            plt.show()


In [None]:
# Saving the trained network
for layer in modelsD:
    for neuron in modelsD[layer]:
        torch.save(modelsD[layer][neuron], direc_main+'mnist_GRU_l{}_n{}.pth'.format(layer, neuron))


In [139]:
# The exact same above has to be done with regularization on a new set of models



## Bidirectional LSTM

In [None]:
class GRU(nn.Module):
    def __init__(self, n_neurons=128, num_layers=1, n_steps=28, n_inputs=28, n_outputs=10):
        super(GRU, self).__init__()
        
        self.n_neurons = n_neurons
        self.num_layers = num_layers
        self.n_steps = n_steps
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.gru = nn.GRU(
            input_size=self.n_inputs,
            hidden_size=self.n_neurons,
            num_layers=self.num_layers,
            batch_first=True
        )
        self.FC = nn.Linear(self.n_neurons, self.n_outputs)
        
    def forward(self, X):
        states, _ = self.gru(X)
        fcout = self.FC(states[:,-1,:])
        out = nn.Softmax(dim=1)(fcout)
        preds = torch.argmax(out, dim=1)

        return {
			'in': X,
			'out': out,
			'preds': preds,
            'fcout': fcout,
			'states':states
		}

In [None]:
# Grid Search of Parameters
layers_list = [1,2]
neurons_list = [32,64]
metricsD = {}
for layer in layers_list:
    metricsD[layer] = {}
    for neuron in neurons_list:
        metricsD[layer][neuron] = {}

# Dictionary of models
modelsD = {}
for layer in layers_list:
    modelsD[layer] = {}
    for neuron in neurons_list:
        model = GRU(n_neurons=neuron, num_layers=layer)
        modelsD[layer][neuron] = model


In [None]:
# Cross Entropy Loss
loss_fn = nn.CrossEntropyLoss()
# Number of Epochs
num_epochs = 1

In [None]:
for layer in modelsD:
    for neuron in modelsD[layer]:
        model = modelsD[layer][neuron]
        print('\n&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')
        print(f'CONFIGURATION: num_layers = {layer}, hidden_size = {neuron}')

        # Adam optimizer
        optimizer = optim.Adam(model.parameters())

        # Measuring time taken for the training process
        losses = []
        losses_test = []
        acc_test = []
        interval = 50
        start = time.time()
        for i in range(num_epochs):
            print(f"Epoch {i+1}\n-------------------------------")
            
            # Training the network for this epoch
            temp = train_loop(loaders, model, loss_fn, optimizer, interval)
            losses += temp['losses']
            losses_test += temp['losses_test']
            acc_test += temp['acc_test']

        # Testing on the validation set
        print('\n-----------------------------------------')
        print('Performance on the Validation set...')
        temp = test_loop(loaders, model, loss_fn, option='val')
        losses_test.append(temp['loss'])
        acc_test.append(temp['acc'])

        # Testing on the test set
        print('Performance on the Test set...')
        temp = test_loop(loaders, model, loss_fn, option='test')

        # Logging the losses and accuracies obtained
        metricsD[layer][neuron]['l'] = losses
        metricsD[layer][neuron]['lt'] = losses_test
        metricsD[layer][neuron]['at'] = acc_test

        end = time.time()
        print('-----------------------------------------')
        print('-----------------------------------------')
        print('Time taken for the training: {0:.5f} seconds'.format(end-start))
        print('-----------------------------------------')


In [None]:
# Plotting the relevant graphs and plots
saving = False
dataloader = loaders['test']
for layer in modelsD:
    for neuron in modelsD[layer]:
        model = modelsD[layer][neuron]
        prefix = direc_pics + 'mnist_GRU_l{}_n{}'.format(layer, neuron)
        plotGraphs(
            losses = metricsD[layer][neuron]['l'],
            losses_test = metricsD[layer][neuron]['lt'],
            acc_test = metricsD[layer][neuron]['at'],
            interval = interval,
            prefix = prefix,
            saving = saving
        )

        # Plotting few pictures with true and predicted labels
        with torch.no_grad():
            for X, y in dataloader:
                temp = model(X)
            
            figure = plt.figure(figsize=(10, 10))
            cols, rows = 5, 5
            for i in range(1, cols * rows + 1):
                sample_idx = torch.randint(5000, size=(1,)).item()
                img, label = X[sample_idx], y[sample_idx]
                figure.add_subplot(rows, cols, i)
                plt.title('Truth: {} | Pred: {}'.format(label, temp['preds'][sample_idx]), fontsize=9)
                plt.axis("off")
                plt.imshow(img.squeeze(), cmap="gray")
            if saving:
                plt.savefig(prefix + '_samples.png',bbox_inches='tight')
            plt.show()


In [None]:
# Saving the trained network
for layer in modelsD:
    for neuron in modelsD[layer]:
        torch.save(modelsD[layer][neuron], direc_main+'mnist_GRU_l{}_n{}.pth'.format(layer, neuron))


In [None]:
# The exact same above has to be done with regularization on a new set of models
