# Import Libraries to Use

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model, layers
import csv
import re
import pylab
from time import time
import pickle

import collections
from nltk.tokenize import word_tokenize

import nltk
nltk.download('punkt')

seed = 10
tf.random.set_seed(seed)

# Create folder to store results
if not os.path.exists('./results/partb'):
    os.mkdir('./results/partb')

# Set Variables

In [None]:
batch_size = 128
one_hot_size = 256
no_epochs = 250
lr = 0.01

train_file = 'train_medium.csv'
test_file = 'test_medium.csv'

MAX_DOCUMENT_LENGTH = 100
N_FILTERS = 10
FILTER_SHAPE1 = [20, 256]
FILTER_SHAPE2 = [20, 1]
POOLING_WINDOW = 4
POOLING_STRIDE = 2
MAX_LABEL = 15

# Read Data with Character

In [None]:
def vocabulary(strings):
    chars = sorted(list(set(list(''.join(strings)))))
    char_to_ix = { ch:i for i,ch in enumerate(chars) }
    vocab_size = len(chars)
    return vocab_size, char_to_ix

def preprocess(strings, char_to_ix, MAX_LENGTH):
    data_chars = [list(d.lower()) for _, d in enumerate(strings)]
    for i, d in enumerate(data_chars):
        if len(d)>MAX_LENGTH:
            d = d[:MAX_LENGTH]
        elif len(d) < MAX_LENGTH:
            d += [' '] * (MAX_LENGTH - len(d))
            
    data_ids = np.zeros([len(data_chars), MAX_LENGTH], dtype=np.int64)
    for i in range(len(data_chars)):
        for j in range(MAX_LENGTH):
            data_ids[i, j] = char_to_ix[data_chars[i][j]]
    return np.array(data_ids)

def read_data_chars(train_file, test_file):
    x_train, y_train, x_test, y_test = [], [], [], []
    cop = re.compile("[^a-z^A-Z^0-9^,^.^' ']")
    with open(train_file, encoding='utf-8') as filex:
        reader = csv.reader(filex)
        for row in reader:
            data = cop.sub("", row[1])
            x_train.append(data)
            y_train.append(int(row[0]))

    with open(test_file, encoding='utf-8') as filex:
        reader = csv.reader(filex)
        for row in reader:
            data = cop.sub("", row[1])
            x_test.append(data)
            y_test.append(int(row[0]))


    vocab_size, char_to_ix = vocabulary(x_train+x_test)
    x_train = preprocess(x_train, char_to_ix, MAX_DOCUMENT_LENGTH)
    y_train = np.array(y_train)
    x_test = preprocess(x_test, char_to_ix, MAX_DOCUMENT_LENGTH)
    y_test = np.array(y_test)

    x_train = tf.constant(x_train, dtype=tf.int64)
    y_train = tf.constant(y_train, dtype=tf.int64)
    x_test = tf.constant(x_test, dtype=tf.int64)
    y_test = tf.constant(y_test, dtype=tf.int64)

    return x_train, y_train, x_test, y_test

# Read in characters files
x_train_c, y_train_c, x_test_c, y_test_c = read_data_chars(train_file, test_file)

# Use `tf.data` to batch and shuffle the dataset:
train_ds_c = tf.data.Dataset.from_tensor_slices((x_train_c, y_train_c)).shuffle(10000).batch(batch_size)
test_ds_c = tf.data.Dataset.from_tensor_slices((x_test_c, y_test_c)).batch(batch_size)

# Read Data with Words

In [None]:
def clean_str(text):
    text = re.sub(r"[^A-Za-z0-9(),!?\'\`\"]", " ", text)
    text = re.sub(r"\s{2,}", " ", text)
    text = text.strip().lower()

    return text


def build_word_dict(contents):
    words = list()
    for content in contents:
        for word in word_tokenize(clean_str(content)):
            words.append(word)

    word_counter = collections.Counter(words).most_common()
    word_dict = dict()
    word_dict["<pad>"] = 0
    word_dict["<unk>"] = 1
    word_dict["<eos>"] = 2
    for word, _ in word_counter:
        word_dict[word] = len(word_dict)
    return word_dict


def preprocess(contents, word_dict, document_max_len):
    x = list(map(lambda d: word_tokenize(clean_str(d)), contents))
    x = list(map(lambda d: list(map(lambda w: word_dict.get(w, word_dict["<unk>"]), d)), x))
    x = list(map(lambda d: d + [word_dict["<eos>"]], x))
    x = list(map(lambda d: d[:document_max_len], x))
    x = list(map(lambda d: d + (document_max_len - len(d)) * [word_dict["<pad>"]], x))
    return x


def read_data_words(train_file, test_file):
    x_train, y_train, x_test, y_test = [], [], [], []
    cop = re.compile("[^a-z^A-Z^0-9^,^.^' ']")
    with open(train_file, encoding='utf-8') as filex:
        reader = csv.reader(filex)
        for row in reader:
            data = cop.sub("", row[1])
            x_train.append(data)
            y_train.append(int(row[0]))

    with open(test_file, encoding='utf-8') as filex:
        reader = csv.reader(filex)
        for row in reader:
            data = cop.sub("", row[1])
            x_test.append(data)
            y_test.append(int(row[0]))

    word_dict = build_word_dict(x_train+x_test)
    x_train = preprocess(x_train, word_dict, MAX_DOCUMENT_LENGTH)
    y_train = np.array(y_train)
    x_test = preprocess(x_test, word_dict, MAX_DOCUMENT_LENGTH)
    y_test = np.array(y_test)

    x_train = [x[:MAX_DOCUMENT_LENGTH] for x in x_train]
    x_test = [x[:MAX_DOCUMENT_LENGTH] for x in x_test]
    x_train = tf.constant(x_train, dtype=tf.int64)
    y_train = tf.constant(y_train, dtype=tf.int64)
    x_test = tf.constant(x_test, dtype=tf.int64)
    y_test = tf.constant(y_test, dtype=tf.int64)

    vocab_size = tf.get_static_value(tf.reduce_max(x_train))
    vocab_size = max(vocab_size, tf.get_static_value(tf.reduce_max(x_test))) + 1
    return x_train, y_train, x_test, y_test, vocab_size


x_train, y_train, x_test, y_test, vocab_size = read_data_words(train_file, test_file)
# Use `tf.data` to batch and shuffle the dataset:
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

# Build Model

## CharCNN Model

In [None]:
if not os.path.exists('./results/partb/char_cnn'):
    os.mkdir('./results/partb/char_cnn')

In [None]:
# Build model
tf.keras.backend.set_floatx('float32')
class CharCNN(Model):
    def __init__(self, vocab_size=256, use_dropout=False):
        super(CharCNN, self).__init__()
        self.vocab_size = vocab_size
        self.use_dropout = use_dropout
        # Weight variables and RNN cell
        # 1st convolution and pooling layers
        self.conv1 = layers.Conv2D(N_FILTERS, FILTER_SHAPE1, padding='VALID', activation='relu', use_bias=True)
        self.pool1 = layers.MaxPool2D(POOLING_WINDOW, POOLING_STRIDE, padding='SAME')
        # 2nd convolution and pooling layers
        self.conv2 = layers.Conv2D(N_FILTERS, FILTER_SHAPE2, padding='VALID', activation='relu', use_bias=True)
        self.pool2 = layers.MaxPool2D(POOLING_WINDOW, POOLING_STRIDE, padding='SAME')
        self.flatten = layers.Flatten()
        self.dense = layers.Dense(MAX_LABEL, activation='softmax')

    def call(self, x, drop_rate=0.5):
        # forward
        # x format - [n_samples, n_features]
        x = tf.one_hot(x, one_hot_size) # x - [n_samples, n_features, one_hot_size (256)]
        x = x[..., tf.newaxis]          # x - [n_samples, n_features, one_hot_size (256), channels (1)]
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.flatten(x)
        if self.use_dropout:
          x = tf.nn.dropout(x, drop_rate)
        logits = self.dense(x)
        return logits

char_cnn_model = CharCNN(256, False)
char_cnn_dropout = CharCNN(256, True)

### Optimiser and Loss function

In [None]:
# Choose optimizer and loss function for training
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.SGD(learning_rate=lr)

# Select metrics to measure the loss and the accuracy of the model. 
# These metrics accumulate the values over epochs and then print the overall result.
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

### Training and Test Functions

In [None]:
# Training function
def train_step(model, x, label, drop_rate):
    with tf.GradientTape() as tape:
        out = model(x, drop_rate)
        loss = loss_object(label, out)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
    train_loss(loss)
    train_accuracy(labels, out)

# Testing function
def test_step(model, x, label, drop_rate=0):
    out = model(x,drop_rate)
    t_loss = loss_object(label, out)
    test_loss(t_loss)
    test_accuracy(label, out)

### Run without dropout

In [None]:
# without dropout
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds_c:
        train_step(char_cnn_model, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds_c:
        test_step(char_cnn_model, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))
    
# save results
char_cnn_res = {'train_ec': train_ec,
                        'train_acc': train_acc,
                        'test_ec': test_ec,
                        'test_acc': test_acc,
                        'timings': timings}
filename = './results/partb/char_cnn/char_cnn_base'
with open(filename, 'wb') as file_pi:
      pickle.dump(char_cnn_res, file_pi)

In [None]:
# Plot test accuracy
pylab.figure(figsize=(12,4))
pylab.subplot(1,2,1)
pylab.plot(np.arange(no_epochs), test_acc, label='test')
pylab.plot(np.arange(no_epochs), train_acc, label='train')
pylab.title('Model Accuracy for CharCNN')
pylab.xlabel('epochs')
pylab.ylabel('accuracy')
pylab.legend(loc='lower right')

pylab.subplot(1,2,2)
pylab.plot(np.arange(no_epochs), test_ec, label='test')
pylab.plot(np.arange(no_epochs), train_ec, label='train')
pylab.title('Model Loss for CharCNN')
pylab.xlabel('epochs')
pylab.ylabel('entropy cost')
pylab.legend(loc='upper right')

pylab.savefig('/results/partb/char_cnn/char_cnn_base.pdf')

### Run with Dropout

In [None]:
# with dropout
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds_c:
        train_step(char_cnn_dropout, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds_c:
        test_step(char_cnn_dropout, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))
    
# save results
char_cnn_dropout_res = {'train_ec': train_ec,
                        'train_acc': train_acc,
                        'test_ec': test_ec,
                        'test_acc': test_acc,
                        'timings': timings}
filename = './results/partb/char_cnn/char_cnn_dropout'
with open(filename, 'wb') as file_pi:
      pickle.dump(char_cnn_dropout_res, file_pi)

In [None]:
# Plot test accuracy
pylab.figure(figsize=(12,4))
pylab.subplot(1,2,1)
pylab.plot(np.arange(no_epochs), test_acc, label='test')
pylab.plot(np.arange(no_epochs), train_acc, label='train')
pylab.title('Model Accuracy for CharCNN')
pylab.xlabel('epochs')
pylab.ylabel('accuracy')
pylab.legend(loc='lower right')

pylab.subplot(1,2,2)
pylab.plot(np.arange(no_epochs), test_ec, label='test')
pylab.plot(np.arange(no_epochs), train_ec, label='train')
pylab.title('Model Loss for CharCNN')
pylab.xlabel('epochs')
pylab.ylabel('entropy cost')
pylab.legend(loc='upper right')

pylab.savefig('./results/partb/char_cnn/char_cnn_dropout.pdf')

## WordCNN

In [None]:
MAX_DOCUMENT_LENGTH = 100
N_FILTERS = 10
EMBEDDING_SIZE = 20
FILTER_SHAPE1 = [20, 20]
FILTER_SHAPE2 = [20, 1]
POOLING_WINDOW = 4
POOLING_STRIDE = 2
MAX_LABEL = 15

batch_size = 128
no_epochs = 250
lr = 0.01

if not os.path.exists('./results/partb/word_cnn'):
    os.mkdir('./results/partb/word_cnn')

### Build Model

In [None]:
# Build model
tf.keras.backend.set_floatx('float32')
class WordCNN(Model):
    def __init__(self, vocab_size, use_dropout=False):
        super(WordCNN, self).__init__()
        self.vocab_size = vocab_size
        self.use_dropout = use_dropout
        self.embedding = layers.Embedding(vocab_size, EMBEDDING_SIZE, input_length=MAX_DOCUMENT_LENGTH)
        # Weight variables and RNN cell
        # 1st convolution and pooling layers
        self.conv1 = layers.Conv2D(N_FILTERS, FILTER_SHAPE1, padding='VALID', activation='relu', use_bias=True)
        self.pool1 = layers.MaxPool2D(POOLING_WINDOW, POOLING_STRIDE, padding='SAME')
        # 2nd convolution and pooling layers
        self.conv2 = layers.Conv2D(N_FILTERS, FILTER_SHAPE2, padding='VALID', activation='relu', use_bias=True)
        self.pool2 = layers.MaxPool2D(POOLING_WINDOW, POOLING_STRIDE, padding='SAME')
        self.flatten = layers.Flatten()
        self.dense = layers.Dense(MAX_LABEL, activation='softmax')

    def call(self, x, drop_rate=0.5):
        # forward
        # x - [n_samples, features]
        x = self.embedding(x)     # x - [n_samples, features, embedding_size(20)]
        x = x[..., tf.newaxis]    # x - [n_samples, features, embedding_size(20), channels(1)]
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.flatten(x)
        if self.use_dropout:
          x = tf.nn.dropout(x, drop_rate)
        logits = self.dense(x)
        return logits

word_cnn_model = WordCNN(vocab_size, False)
word_cnn_dropout = WordCNN(vocab_size, True)

### Optimiser, Loss, Train, Test Functions

In [None]:
# Choose optimizer and loss function for training
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.SGD(learning_rate=lr)

# Select metrics to measure the loss and the accuracy of the model. 
# These metrics accumulate the values over epochs and then print the overall result.
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [None]:
# Training function
def train_step(model, x, label, drop_rate):
    with tf.GradientTape() as tape:
        out = model(x, drop_rate)
        loss = loss_object(label, out)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
    train_loss(loss)
    train_accuracy(labels, out)

# Testing function
def test_step(model, x, label, drop_rate=0):
    out = model(x,drop_rate)
    t_loss = loss_object(label, out)
    test_loss(t_loss)
    test_accuracy(label, out)

### Train without dropout

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds:
        train_step(word_cnn_model, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds:
        test_step(word_cnn_model, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

word_cnn_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = './results/partb/word_cnn/word_cnn_base'
with open(filename, 'wb') as file_pi:
      pickle.dump(word_cnn_res, file_pi)

In [None]:
# Plot test accuracy
pylab.figure(figsize=(12,4))
pylab.subplot(1,2,1)
pylab.plot(np.arange(no_epochs), test_acc, label='test')
pylab.plot(np.arange(no_epochs), train_acc, label='train')
pylab.title('Model Accuracy for WordCNN')
pylab.xlabel('epochs')
pylab.ylabel('accuracy')
pylab.legend(loc='lower right')

pylab.subplot(1,2,2)
pylab.plot(np.arange(no_epochs), test_ec, label='test')
pylab.plot(np.arange(no_epochs), train_ec, label='train')
pylab.title('Model Loss for WordCNN')
pylab.xlabel('epochs')
pylab.ylabel('entropy cost')
pylab.legend(loc='upper right')

pylab.savefig('./results/partb/word_cnn/word_cnn_base.pdf')

### Train with dropout

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds:
        train_step(word_cnn_dropout, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds:
        test_step(word_cnn_dropout, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

word_cnn_dropout_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = '.results/partb/word_cnn/word_cnn_dropout'
with open(filename, 'wb') as file_pi:
      pickle.dump(word_cnn_dropout_res, file_pi)

In [None]:
# Plot test accuracy
pylab.figure(figsize=(12,4))
pylab.subplot(1,2,1)
pylab.plot(np.arange(no_epochs), test_acc, label='test')
pylab.plot(np.arange(no_epochs), train_acc, label='train')
pylab.title('Model Accuracy for WordCNN')
pylab.xlabel('epochs')
pylab.ylabel('accuracy')
pylab.legend(loc='lower right')

pylab.subplot(1,2,2)
pylab.plot(np.arange(no_epochs), test_ec, label='test')
pylab.plot(np.arange(no_epochs), train_ec, label='train')
pylab.title('Model Loss for WordCNN')
pylab.xlabel('epochs')
pylab.ylabel('entropy cost')
pylab.legend(loc='upper right')

pylab.savefig('.results/partb/word_cnn/word_cnn_dropout.pdf')

## WordRNN

In [None]:
MAX_DOCUMENT_LENGTH = 100
HIDDEN_SIZE = 20
EMBEDDING_SIZE = 20

if not os.path.exists('./results/partb/word_rnn'):
    os.mkdir('./results/partb/word_rnn')

In [None]:
# Build model
tf.keras.backend.set_floatx('float32')
class WordRNN(Model):
    def __init__(self, vocab_size, hidden_dim=10, use_dropout=False, cell_type='gru', stacked=False):
        super(WordRNN, self).__init__()
        # Hyperparameters
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        self.use_dropout = use_dropout
        self.stacked = stacked
        self.embedding = layers.Embedding(vocab_size, EMBEDDING_SIZE, input_length=MAX_DOCUMENT_LENGTH)
        # Weight variables and RNN cell
        if cell_type == 'vanilla':
          self.rnn = layers.RNN(tf.keras.layers.SimpleRNNCell(self.hidden_dim), 
                                unroll=True)
        elif cell_type == 'gru':
          if stacked:
            self.rnn1 = layers.RNN(tf.keras.layers.GRUCell(self.hidden_dim), 
                                   unroll=True, return_sequences=True)
            self.rnn2 = layers.RNN(tf.keras.layers.GRUCell(self.hidden_dim), 
                                   unroll=True)
          else:
            self.rnn = layers.RNN(tf.keras.layers.GRUCell(self.hidden_dim), 
                                   unroll=True)
        elif cell_type == 'lstm':
          self.rnn = layers.RNN(tf.keras.layers.LSTMCell(self.hidden_dim), 
                                   unroll=True)

        self.dense = layers.Dense(MAX_LABEL, activation='softmax')

    def call(self, x, drop_rate):
        # forward logic
        embedding = self.embedding(x)   # x - [n_samples, features, embedding_size(20)]
        if self.stacked:
          encoding = self.rnn1(embedding)
          encoding = self.rnn2(encoding)
        else:
          encoding = self.rnn(embedding)
        
        if self.use_dropout:
          encoding = tf.nn.dropout(encoding, drop_rate)
        logits = self.dense(encoding)
    
        return logits

word_rnn_model = WordRNN(vocab_size, HIDDEN_SIZE, False)
word_rnn_dropout = WordRNN(vocab_size, HIDDEN_SIZE, True)
word_rnn_vanilla = WordRNN(vocab_size, HIDDEN_SIZE, False, cell_type='vanilla')
word_rnn_lstm = WordRNN(vocab_size, HIDDEN_SIZE, False, cell_type='lstm')
word_rnn_stacked = WordRNN(vocab_size, HIDDEN_SIZE, False, stacked=True)
word_rnn_gradclip = WordRNN(vocab_size, HIDDEN_SIZE, False)

### Functions

In [None]:
# Choose optimizer and loss function for training
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

# Select metrics to measure the loss and the accuracy of the model. 
# These metrics accumulate the values over epochs and then print the overall result.
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [None]:
# Training function
def train_step(model, x, label, drop_rate):
    with tf.GradientTape() as tape:
        out = model(x, drop_rate)
        loss = loss_object(label, out)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
    train_loss(loss)
    train_accuracy(labels, out)

# Testing function
def test_step(model, x, label, drop_rate=0):
    out = model(x,drop_rate)
    t_loss = loss_object(label, out)
    test_loss(t_loss)
    test_accuracy(label, out)

### Train without dropout

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds:
        train_step(word_rnn_model, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds:
        test_step(word_rnn_model, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

word_rnn_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = './results/partb/word_rnnword_rnn_base'
with open(filename, 'wb') as file_pi:
      pickle.dump(word_rnn_res, file_pi)

In [None]:
# Plot test accuracy
pylab.figure(figsize=(12,4))
pylab.subplot(1,2,1)
pylab.plot(np.arange(no_epochs), test_acc, label='test')
pylab.plot(np.arange(no_epochs), train_acc, label='train')
pylab.title('Model Accuracy for WordRNN')
pylab.xlabel('epochs')
pylab.ylabel('accuracy')
pylab.legend(loc='lower right')

pylab.subplot(1,2,2)
pylab.plot(np.arange(no_epochs), test_ec, label='test')
pylab.plot(np.arange(no_epochs), train_ec, label='train')
pylab.title('Model Loss for WordRNN')
pylab.xlabel('epochs')
pylab.ylabel('entropy cost')
pylab.legend(loc='upper right')

pylab.savefig('./results/partb/word_rnn/word_rnn_base.pdf')

### Train with dropout

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds:
        train_step(word_rnn_dropout, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds:
        test_step(word_rnn_dropout, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

word_rnn_dropout_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = './results/partb/word_rnn/word_rnn_dropout'
with open(filename, 'wb') as file_pi:
      pickle.dump(word_rnn_dropout_res, file_pi)

In [None]:
# Plot test accuracy
pylab.figure(figsize=(12,4))
pylab.subplot(1,2,1)
pylab.plot(np.arange(no_epochs), test_acc, label='test')
pylab.plot(np.arange(no_epochs), train_acc, label='train')
pylab.title('Model Accuracy for WordRNN')
pylab.xlabel('epochs')
pylab.ylabel('accuracy')
pylab.legend(loc='lower right')

pylab.subplot(1,2,2)
pylab.plot(np.arange(no_epochs), test_ec, label='test')
pylab.plot(np.arange(no_epochs), train_ec, label='train')
pylab.title('Model Loss for WordRNN')
pylab.xlabel('epochs')
pylab.ylabel('entropy cost')
pylab.legend(loc='upper right')

pylab.savefig('./results/partb/word_rnn/word_rnn_dropout.pdf')

## CharRNN

In [None]:
MAX_DOCUMENT_LENGTH = 100
HIDDEN_SIZE = 20

if not os.path.exists('./results/partb/char_rnn'):
    os.mkdir('./results/partb/char_rnn')

In [None]:
# Build model
tf.keras.backend.set_floatx('float32')
class CharRNN(Model):
    def __init__(self, vocab_size=256, hidden_dim=10, use_dropout=False, cell_type='gru', stacked=False):
        super(CharRNN, self).__init__()
        # Hyperparameters
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        self.use_dropout = use_dropout
        self.cell_type = cell_type
        self.stacked = stacked
        
        # Weight variables and RNN cell
        if cell_type == 'vanilla':
          self.rnn = layers.RNN(tf.keras.layers.SimpleRNNCell(self.hidden_dim), 
                                unroll=True)
        elif cell_type == 'gru':
          if stacked:
            self.rnn1 = layers.RNN(tf.keras.layers.GRUCell(self.hidden_dim), 
                                   unroll=True, return_sequences=True)
            self.rnn2 = layers.RNN(tf.keras.layers.GRUCell(self.hidden_dim), 
                                   unroll=True)
          else:
            self.rnn = layers.RNN(tf.keras.layers.GRUCell(self.hidden_dim), 
                                   unroll=True)
        elif cell_type == 'lstm':
          self.rnn = layers.RNN(tf.keras.layers.LSTMCell(self.hidden_dim), 
                                   unroll=True)

        self.dense = layers.Dense(MAX_LABEL, activation='softmax')

    def call(self, x, drop_rate):
        # forward logic
        x = tf.one_hot(x, one_hot_size)
        if self.stacked:
          encoding = self.rnn1(x)
          encoding = self.rnn2(encoding)
        else:
          encoding = self.rnn(x)
        if self.use_dropout:
          encoding = tf.nn.dropout(encoding, drop_rate)
        logits = self.dense(encoding)
    
        return logits

char_rnn_model = CharRNN(256, HIDDEN_SIZE, False)
char_rnn_dropout = CharRNN(256, HIDDEN_SIZE, True)
char_rnn_vanilla = CharRNN(256, HIDDEN_SIZE, False, cell_type='vanilla')
char_rnn_lstm = CharRNN(256, HIDDEN_SIZE, False, cell_type='lstm')
char_rnn_stacked = CharRNN(256, HIDDEN_SIZE, False, stacked=True)
char_rnn_gradclip = CharRNN(256, HIDDEN_SIZE, False)

### Optimiser, Loss, Train, Test Function

In [None]:
# Choose optimizer and loss function for training
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

# Select metrics to measure the loss and the accuracy of the model. 
# These metrics accumulate the values over epochs and then print the overall result.
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [None]:
# Training function
def train_step(model, x, label, drop_rate):
    with tf.GradientTape() as tape:
        out = model(x, drop_rate)
        loss = loss_object(label, out)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
    train_loss(loss)
    train_accuracy(labels, out)

# Testing function
def test_step(model, x, label, drop_rate=0):
    out = model(x,drop_rate)
    t_loss = loss_object(label, out)
    test_loss(t_loss)
    test_accuracy(label, out)

### Train without dropout

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds:
        train_step(char_rnn_model, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds:
        test_step(char_rnn_model, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

char_rnn_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = './results/partb/char_rnn/char_rnn_base'
with open(filename, 'wb') as file_pi:
      pickle.dump(char_rnn_res, file_pi)

In [None]:
# Plot test accuracy
pylab.figure(figsize=(12,4))
pylab.subplot(1,2,1)
pylab.plot(np.arange(no_epochs), test_acc, label='test')
pylab.plot(np.arange(no_epochs), train_acc, label='train')
pylab.title('Model Accuracy for CharRNN')
pylab.xlabel('epochs')
pylab.ylabel('accuracy')
pylab.legend(loc='lower right')

pylab.subplot(1,2,2)
pylab.plot(np.arange(no_epochs), test_ec, label='test')
pylab.plot(np.arange(no_epochs), train_ec, label='train')
pylab.title('Model Loss for CharRNN')
pylab.xlabel('epochs')
pylab.ylabel('entropy cost')
pylab.legend(loc='upper right')

pylab.savefig('./results/partb/char_rnn/char_rnn_base.pdf')

### Train with dropout

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds:
        train_step(char_rnn_dropout, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds:
        test_step(char_rnn_dropout, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

char_rnn_dropout_res = {'train_ec': train_ec,
                        'train_acc': train_acc,
                        'test_ec': test_ec,
                        'test_acc': test_acc,
                        'timings': timings}

filename = './results/partb/char_rnn/char_rnn_dropout'
with open(filename, 'wb') as file_pi:
      pickle.dump(char_rnn_dropout_res, file_pi)

In [None]:
# Plot test accuracy
pylab.figure(figsize=(12,4))
pylab.subplot(1,2,1)
pylab.plot(np.arange(no_epochs), test_acc, label='test')
pylab.plot(np.arange(no_epochs), train_acc, label='train')
pylab.title('Model Accuracy for CharRNN')
pylab.xlabel('epochs')
pylab.ylabel('accuracy')
pylab.legend(loc='lower right')

pylab.subplot(1,2,2)
pylab.plot(np.arange(no_epochs), test_ec, label='test')
pylab.plot(np.arange(no_epochs), train_ec, label='train')
pylab.title('Model Loss for CharRNN')
pylab.xlabel('epochs')
pylab.ylabel('entropy cost')
pylab.legend(loc='upper right')

pylab.savefig('./results/partb/char_rnn/char_rnn_dropout.pdf')

# Question 5

In [None]:
# Load back dictionaries
models = ['char_cnn', 'word_cnn', 'char_rnn', 'word_rnn']
all_res = []

for model in models:
  filename = './results/partb/{}/{}_base'.format(model)
  all_res.append(pickle.load(open(filename, "rb")))

print('Average Time Per Epoch')
for res, model in zip(all_res, models):
  avg = np.mean(res['timings'])
  print('{}: {}'.format(model, avg))

print('Max Validation Accuracy')
for res, model in zip(all_res, models):
  max = np.max(res['test_acc'])
  print('{}: {}'.format(model, max))

In [None]:
# Load back dictionaries with dropout
models = ['char_cnn', 'word_cnn', 'char_rnn', 'word_rnn']
all_res_dropouts = []

for model in models:
  filename = './results/partb/{}/{}_dropout'.format(model)
  all_res_dropouts.append(pickle.load(open(filename, "rb")))

print('Max Validation Accuracy (with dropouts)')
for res, model in zip(all_res_dropouts, models):
  max = np.max(res['test_acc'])
  print('{}: {}'.format(model, max))

# Question 6

#### Functions

In [None]:
# Choose optimizer and loss function for training
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

# Select metrics to measure the loss and the accuracy of the model. 
# These metrics accumulate the values over epochs and then print the overall result.
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

# Training function
def train_step(model, x, label, drop_rate):
    with tf.GradientTape() as tape:
        out = model(x, drop_rate)
        loss = loss_object(label, out)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
    train_loss(loss)
    train_accuracy(labels, out)

# Testing function
def test_step(model, x, label, drop_rate=0):
    out = model(x,drop_rate)
    t_loss = loss_object(label, out)
    test_loss(t_loss)
    test_accuracy(label, out)

### CharRNN

#### Train for Vanilla

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds_c:
        train_step(char_rnn_vanilla, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds_c:
        test_step(char_rnn_vanilla, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

char_rnn_vanilla_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = './results/partb/char_rnn/char_rnn_vanilla'
with open(filename, 'wb') as file_pi:
      pickle.dump(char_rnn_vanilla_res, file_pi)

#### Train for LSTM

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds_c:
        train_step(char_rnn_lstm, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds_c:
        test_step(char_rnn_lstm, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

char_rnn_lstm_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = './results/partb/char_rnn/char_rnn_lstm'
with open(filename, 'wb') as file_pi:
      pickle.dump(char_rnn_lstm_res, file_pi)

#### Train for 2 layers

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds_c:
        train_step(char_rnn_stacked, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds_c:
        test_step(char_rnn_stacked, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

char_rnn_stacked_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = './results/partb/char_rnn/char_rnn_stacked'
with open(filename, 'wb') as file_pi:
      pickle.dump(char_rnn_stacked_res, file_pi)

#### Train with gradient clipping

In [None]:
# Choose optimizer and loss function for training
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=lr, clipvalue=2)

# Select metrics to measure the loss and the accuracy of the model. 
# These metrics accumulate the values over epochs and then print the overall result.
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

# Training function
def train_step(model, x, label, drop_rate):
    with tf.GradientTape() as tape:
        out = model(x, drop_rate)
        loss = loss_object(label, out)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
    train_loss(loss)
    train_accuracy(labels, out)

# Testing function
def test_step(model, x, label, drop_rate=0):
    out = model(x,drop_rate)
    t_loss = loss_object(label, out)
    test_loss(t_loss)
    test_accuracy(label, out)

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds_c:
        train_step(char_rnn_gradclip, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds_c:
        test_step(char_rnn_gradclip, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

char_rnn_gradclip_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = './results/partb/char_rnn/char_rnn_gradclip'
with open(filename, 'wb') as file_pi:
      pickle.dump(char_rnn_gradclip_res, file_pi)

### Load back results

In [None]:
# Load back dictionaries with dropout
char_rnn_models = ['vanilla', 'lstm', 'stacked', 'gradclip']
all_char_rnn_res = []

for model in char_rnn_models:
  filename = './results/partb/char_rnn/char_rnn_{}'.format(model)
  all_char_rnn_res.append(pickle.load(open(filename, "rb")))

print('Max Validation Accuracy for CharRNN Models')
for res, model in zip(all_char_rnn_res, char_rnn_models):
  max = np.max(res['test_acc'])
  print('{}: {}'.format(model, max))

## WordRNN

### Train for Vanilla

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds:
        train_step(word_rnn_vanilla, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds:
        test_step(word_rnn_vanilla, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

word_rnn_vanilla_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = './results/partb/word_rnn/word_rnn_vanilla'
with open(filename, 'wb') as file_pi:
      pickle.dump(word_rnn_vanilla_res, file_pi)

### Train for LSTM

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds:
        train_step(word_rnn_lstm, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds:
        test_step(word_rnn_lstm, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

word_rnn_lstm_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = './results/partb/word_rnn/word_rnn_lstm'
with open(filename, 'wb') as file_pi:
      pickle.dump(word_rnn_lstm_res, file_pi)

### Train for 2 layers

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds:
        train_step(word_rnn_stacked, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds:
        test_step(word_rnn_stacked, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

word_rnn_stacked_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = './results/partb/word_rnn/word_rnn_stacked'
with open(filename, 'wb') as file_pi:
      pickle.dump(word_rnn_stacked_res, file_pi)

### Train for gradient clipping

In [None]:
# Choose optimizer and loss function for training
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=lr, clipvalue=2)

# Select metrics to measure the loss and the accuracy of the model. 
# These metrics accumulate the values over epochs and then print the overall result.
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

# Training function
def train_step(model, x, label, drop_rate):
    with tf.GradientTape() as tape:
        out = model(x, drop_rate)
        loss = loss_object(label, out)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        
    train_loss(loss)
    train_accuracy(labels, out)

# Testing function
def test_step(model, x, label, drop_rate=0):
    out = model(x,drop_rate)
    t_loss = loss_object(label, out)
    test_loss(t_loss)
    test_accuracy(label, out)

In [None]:
train_ec = []
train_acc = []
test_ec = []
test_acc = []
timings = []

for epoch in range(no_epochs):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    start_time = time()
    for images, labels in train_ds:
        train_step(word_rnn_gradclip, images, labels, drop_rate=0.5)
    
    end_time = time()
    timings.append(end_time-start_time)

    for images, labels in test_ds:
        test_step(word_rnn_gradclip, images, labels, drop_rate=0)

    train_ec.append(train_loss.result())
    train_acc.append(train_accuracy.result())
    test_ec.append(test_loss.result())
    test_acc.append(test_accuracy.result())
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result(),
                          test_loss.result(),
                          test_accuracy.result()))

word_rnn_gradclip_res = {'train_ec': train_ec,
                'train_acc': train_acc,
                'test_ec': test_ec,
                'test_acc': test_acc,
                'timings': timings}

filename = './results/partb/word_rnn/word_rnn_gradclip'
with open(filename, 'wb') as file_pi:
      pickle.dump(word_rnn_gradclip_res, file_pi)

### Load Back Results

In [None]:
# Load back dictionaries with dropout
word_rnn_models = ['vanilla', 'lstm', 'stacked', 'gradclip']
all_word_rnn_res = []

for model in word_rnn_models:
  filename = './results/partb/word_rnn/word_rnn_{}'.format(model)
  all_word_rnn_res.append(pickle.load(open(filename, "rb")))

print('Max Validation Accuracy for WordRNN Models')
for res, model in zip(all_word_rnn_res, word_rnn_models):
  max = np.max(res['test_acc'])
  print('{}: {}'.format(model, max))