In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from main import *
from sklearn.model_selection import train_test_split
from time import time
import os

In [None]:
path = "/content/drive/My Drive/encoder_decoder_basic/spa.txt"
# option `None` means "use the whole dataset"
num_examples = None
input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(path, num_examples)

max_length_targ, max_length_inp = target_tensor.shape[1], input_tensor.shape[1]

# Creating training and validation sets using an 80-20 split
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor,
                                                                                                target_tensor,
                                                                                                test_size=0.1)
print(f"training examples: {len(input_tensor_train)}")
print(f"testing examples: {len(input_tensor_val)}")

training examples: 115275
testing examples: 12809


In [None]:
# basic parameters
BUFFER_SIZE = len(input_tensor_train)
BATCH_SIZE = 64
vocab_inp_size = len(inp_lang.word_index) + 1
vocab_tar_size = len(targ_lang.word_index) + 1

# create a train dataset
steps_per_epoch = len(input_tensor_train) // BATCH_SIZE
dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

# create a test dataset
steps_per_epoch_test = len(input_tensor_val) // BATCH_SIZE
test_data = tf.data.Dataset.from_tensor_slices((input_tensor_val, target_tensor_val))
test_data = test_data.batch(BATCH_SIZE, drop_remainder=True)

In [None]:
# parameters shared by all the models
embedding_dim = 256
units = 1024

# create encoder and decoderNA
n_layers_m1 = 1
encoder_m1 = Encoder(vocab_inp_size, embedding_dim, n_layers_m1, units, BATCH_SIZE)
decoder_m1 = DecoderNA(vocab_tar_size, embedding_dim, n_layers_m1, units, BATCH_SIZE)

# create deeper encoder and decoderNA
n_layers_m2 = 2
encoder_m2 = Encoder(vocab_inp_size, embedding_dim, n_layers_m2, units, BATCH_SIZE)
decoder_m2 = DecoderNA(vocab_tar_size, embedding_dim, n_layers_m2, units, BATCH_SIZE)

# create encoder and decoder with attention
n_layers_m3 = 1
encoder_m3 = Encoder(vocab_inp_size, embedding_dim, n_layers_m3, units, BATCH_SIZE)
decoder_m3 = Decoder(vocab_tar_size, embedding_dim, n_layers_m3, units, BATCH_SIZE)

# create multilayer encoder and decoder with attention
n_layers_m4 = 2
encoder_m4 = Encoder(vocab_inp_size, embedding_dim, n_layers_m4, units, BATCH_SIZE)
decoder_m4 = Decoder(vocab_tar_size, embedding_dim, n_layers_m4, units, BATCH_SIZE)

In [None]:
optimizer = tf.keras.optimizers.Adam()
optimizer_m2 = tf.keras.optimizers.Adam()
optimizer_m3 = tf.keras.optimizers.Adam()
optimizer_m4 = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
              from_logits=True, reduction='none')

checkpoint_dir = '/content/drive/My Drive/encoder_decoder_basic/training_checkpoints/'
checkpoint_prefix_m1 = os.path.join(checkpoint_dir, 'ckpt_m1')
checkpoint_prefix_m2 = os.path.join(checkpoint_dir, 'ckpt_m2')
checkpoint_prefix_m3 = os.path.join(checkpoint_dir, 'ckpt_m3')
checkpoint_prefix_m4 = os.path.join(checkpoint_dir, 'ckpt_m4')


checkpoint_m1 = tf.train.Checkpoint( optimizer=optimizer
                                   , encoder=encoder_m1
                                   , decoder=decoder_m1)
checkpoint_m2 = tf.train.Checkpoint( optimizer=optimizer_m2
                                   , encoder=encoder_m2
                                   , decoder=decoder_m2)
checkpoint_m3 = tf.train.Checkpoint( optimizer=optimizer_m3
                                   , encoder=encoder_m3
                                   , decoder=decoder_m3)
checkpoint_m4 = tf.train.Checkpoint( optimizer=optimizer_m4
                                   , encoder=encoder_m4
                                   , decoder=decoder_m4)

In [None]:
EPOCHS = 10
train( dataset
     , encoder_m1
     , decoder_m1
     , loss_object
     , optimizer
     , EPOCHS
     , BATCH_SIZE
     , steps_per_epoch
     , targ_lang
     , checkpoint_m1
     , checkpoint_prefix_m1)

In [None]:
EPOCHS = 10
train( dataset
     , encoder_m3
     , decoder_m3
     , loss_object
     , optimizer_m3
     , EPOCHS
     , BATCH_SIZE
     , steps_per_epoch
     , targ_lang
     , checkpoint_m3
     , checkpoint_prefix_m3)

In [None]:
EPOCHS = 10
train( dataset
     , encoder_m2
     , decoder_m2
     , loss_object
     , optimizer_m2
     , EPOCHS
     , BATCH_SIZE
     , steps_per_epoch
     , targ_lang
     , checkpoint_m2
     , checkpoint_prefix_m2)

In [None]:
EPOCHS = 10
train( dataset
     , encoder_m4
     , decoder_m4
     , loss_object
     , optimizer_m4
     , EPOCHS
     , BATCH_SIZE
     , steps_per_epoch
     , targ_lang
     , checkpoint_m4
     , checkpoint_prefix_m4)

In [None]:
evaluate( test_data
        , encoder_m4
        , decoder_m4
        , inp_lang
        , targ_lang
        , max_length_inp
        , max_length_targ
        , units
        , n_layers_m4
        , BATCH_SIZE
        , steps_per_epoch_test)

Average f1 score over validation dataset : 0.6094714520908817
