# Assignment 4: RNNs

In [11]:
# this is used to include model diagram in this file. It is not related to the implementation
from IPython.display import Image

# torch modules
import torch
import torch.nn as nn
from torch.utils.data import DataLoader 

# Dataset from torchvision
import torchvision
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
import torch.optim as optim

# We need them for plot (not really asked, but if you would like to plot)
import matplotlib.pyplot as plt
import numpy as np

First and for most we load the MNIST dataset and break it doen into mini-batches.

In [12]:
batch_size = 50

# transformations
transform = transforms.Compose([transforms.ToTensor()])

# load training dataset
trainset = MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

# load test dataset
testset = MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

We are going to implement the following RNN for MNIST classification via various recurrent units.

In [13]:
Image("RNN.png")

FileNotFoundError: No such file or directory: 'RNN.png'

FileNotFoundError: No such file or directory: 'RNN.png'

<IPython.core.display.Image object>

## Question 1: Implementing Basic RNN
We implement the RNN via basic Recurrent Unit.

### Building our Model 

In [14]:
class myRNN(nn.Module):
    def __init__(self, batch_size):
        super(myRNN, self).__init__()
        self.h_size = 150  # size of hidden state as mentioned in the problem
        self.time = 28     # MNIST images are 28x28, so sequence length is 28
        self.x_size = 28   # each time step has 28 features (one row of pixels)
        self.y_size = 10   # 10 output classes (digits 0-9)

        # we need batch-size
        # since we should make same-size for initial state
        self.batch_size = batch_size

        self.RecUnit = nn.RNN(self.x_size, self.h_size, batch_first=True)
        # don't forget setting: batch_first = True

        self.output_layer = nn.Linear(self.h_size, self.y_size)
    def initial_state(self):
         return torch.zeros(1, self.batch_size, self.h_size)

    def forward(self, x):
        h0 = self.initial_state()
        output, hn = self.RecUnit(x, h0)
        # Get the last time step's output
        last_output = output[:, -1, :]
        # Pass through output layer
        y = self.output_layer(last_output)
        return y

### Write Accuracy Function

In [15]:
def accuracy(z_out, labels):
    # z_out and labels are both a mini-batch
    correct_pred = (torch.argmax(z_out, dim=1) == labels).float().mean()
    accuracy = 100.0 * correct_pred
    return accuracy

### Implement Training Loop

In [16]:
def train(model, loss_function, num_epochs):
    optimizer = optim.Adam(model.parameters())

    for epoch in range(num_epochs): 
        train_loss = 0.0
        train_accuracy = 0.0

        # training loop
        for i, data in enumerate(trainloader):
            # reset initial states
            model.h_state = model.initial_state() 

            # get the inputs
            inputs, labels = data

            # reshape input
            inputs = inputs.view(inputs.size(0), 28, 28)

            # forward pass
            outputs = model(inputs)

            # compute loss
            loss = loss_function(outputs, labels)

            # backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # update training loss and accuracy
            train_loss += loss.item()
            train_accuracy += accuracy(outputs, labels)

        print('Epoch:%d|Loss:%.4f|Accuracy:%.2f' 
        %(epoch+1, train_loss / (i+1), train_accuracy / (i+1)))
    return

and now we can train

In [17]:
# Instantiate model
model = myRNN(batch_size)
loss_function = nn.CrossEntropyLoss()
train(model, loss_function, num_epochs=10)

Epoch:1|Loss:0.7224|Accuracy:76.55
Epoch:2|Loss:0.3034|Accuracy:91.18
Epoch:3|Loss:0.2241|Accuracy:93.44
Epoch:4|Loss:0.1874|Accuracy:94.65
Epoch:5|Loss:0.1707|Accuracy:95.17
Epoch:6|Loss:0.1501|Accuracy:95.78
Epoch:7|Loss:0.1415|Accuracy:96.01
Epoch:8|Loss:0.1455|Accuracy:95.88
Epoch:9|Loss:0.1300|Accuracy:96.31
Epoch:10|Loss:0.1227|Accuracy:96.51


## Question 2: Implementing RNN with Gated Unit
We now replace basic recurrent unit with a GRU.

### Building RNN with GRU

In [18]:
class myGatedRNN(nn.Module):
    def __init__(self, batch_size):
        super(myGatedRNN, self).__init__()
        self.h_size = 150
        self.time = 28
        self.x_size = 28
        self.y_size = 10

        # we need batch-size
        # since we should make same-size for initial state
        self.batch_size = batch_size

        self.RecUnit = nn.GRU(self.x_size, self.h_size, batch_first=True)
        # don't forget setting: batch_first = True

        self.output_layer = nn.Linear(self.h_size, self.y_size)
        
    def initial_state(self):
        return torch.zeros(1, self.batch_size, self.h_size)

    def forward(self, x):
        h0 = self.initial_state()
        output, hn = self.RecUnit(x, h0)
        # Get the last time step's output
        last_output = output[:, -1, :]
        # Pass through output layer
        y = self.output_layer(last_output)
        return y

### Train Gated RNN

In [19]:
# Instantiate model
model = myGatedRNN(batch_size)
loss_function = nn.CrossEntropyLoss()
train(model, loss_function, num_epochs=10)

Epoch:1|Loss:0.4712|Accuracy:84.46
Epoch:2|Loss:0.1240|Accuracy:96.30
Epoch:3|Loss:0.0789|Accuracy:97.68
Epoch:4|Loss:0.0620|Accuracy:98.12
Epoch:5|Loss:0.0473|Accuracy:98.54
Epoch:6|Loss:0.0393|Accuracy:98.78
Epoch:7|Loss:0.0344|Accuracy:98.96
Epoch:8|Loss:0.0296|Accuracy:99.07
Epoch:9|Loss:0.0250|Accuracy:99.23
Epoch:10|Loss:0.0210|Accuracy:99.36


## Question 3: Implementing RNN with LSTM
We now replace basic recurrent unit with an LSTM.

### Building RNN with LSTM

In [20]:
class myLSTM(nn.Module):
    def __init__(self, batch_size):
        super(myLSTM, self).__init__()
        self.h_size = 150
        self.time = 28
        self.x_size = 28
        self.y_size = 10 

        # we need batch-size
        # since we should make same-size for initial state
        self.batch_size = batch_size

        self.RecUnit = nn.LSTM(self.x_size, self.h_size, batch_first=True)
        # don't forget setting: batch_first = True

        self.output_layer = nn.Linear(self.h_size, self.y_size)
        
    def initial_state(self):
        # LSTM needs both hidden state and cell state
        h0 = torch.zeros(1, self.batch_size, self.h_size)
        c0 = torch.zeros(1, self.batch_size, self.h_size)
        return h0, c0

    def forward(self, x):
        h0, c0 = self.initial_state()
        output, (hn, cn) = self.RecUnit(x, (h0, c0))
        # Get the last time step's output
        last_output = output[:, -1, :]
        # Pass through output layer
        y = self.output_layer(last_output)
        return y

### Train LSTM

In [21]:
# Instantiate model
model = myLSTM(batch_size)
loss_function = nn.CrossEntropyLoss()
train(model, loss_function, num_epochs=10)

Epoch:1|Loss:0.4949|Accuracy:84.00
Epoch:2|Loss:0.1493|Accuracy:95.56
Epoch:3|Loss:0.0973|Accuracy:97.06
Epoch:4|Loss:0.0748|Accuracy:97.76
Epoch:5|Loss:0.0599|Accuracy:98.21
Epoch:6|Loss:0.0507|Accuracy:98.44
Epoch:7|Loss:0.0429|Accuracy:98.68
Epoch:8|Loss:0.0358|Accuracy:98.92
Epoch:9|Loss:0.0321|Accuracy:99.00
Epoch:10|Loss:0.0271|Accuracy:99.18
