# Accessing the project directory on my Google Drive  

In [1]:
import os
from google.colab import drive

drive.mount('/drive', force_remount=False)
project_dir = "/drive/My Drive/RNNSeq2Seq/"

# change working directory to project_dir
os.chdir(project_dir)

Mounted at /drive


# Dependencies

In [2]:
from scripts.model import *
from scripts.dataloader import create_dataloader
from scripts.utils import *
from scripts.pytorch_utils import *
import matplotlib.pyplot as plt

# Helper function to help visualize 

In [3]:
def plot_log(log):
    len_range = len(log) -1 if "Best eval accu" in log else len(log)
    epoch_nums, train_loss, dev_loss = [], [], []
    train_acc, dev_acc = [], []

    for epoch in range(1, len_range+1):
        epoch_nums.append(epoch)
        train = log[f"Epoch#{epoch}"]["Train"]
        dev = log[f"Epoch#{epoch}"]["Eval"]
        train_loss.append(train['loss'])
        train_acc.append(train['abosulate accuracy'])

        dev_loss.append(dev['loss'])
        dev_acc.append(dev['abosulate accuracy'])

    ax1 = plt.subplot(211)
    ax1.plot(epoch_nums, train_loss, label="train loss")
    ax1.plot(epoch_nums, dev_loss, label="dev loss")
    ax1.legend()

    ax2 = plt.subplot(212)
    ax2.plot(epoch_nums, train_acc, label="train acc")
    ax2.plot(epoch_nums, dev_acc, label="dev acc")
    ax2.legend()
    plt.show()

In [4]:
folder = "tuning_data/"
experiment_num = 5
shuffle_batch = False

# must be the num of seq per seq length if shuffle_batch is False
# this "tr_batch_size" will be used for dev, and shuffled test sets
tr_batch_size = 250 
dev_batch_size = 500 if not shuffle_batch else tr_batch_size

In [7]:
# setups for training and evaluting

print_freq = 5
acc_threshold = 0.5 # for saving the best trained models 
max_epoch_num = 150 # max num of epoches 
train_acc_exit=0.92 # train acc exit threshold (eval_acc_exit must also be met)
eval_acc_exit=0.92 # dev acc exit threshold (train_acc_exit must also be met)
teacher_forcing_ratio = 1.0 # probability of using real symbol from target sequence for training

# RevStr

In [8]:
train = read_data(join(folder, "RevStr/train.txt"))
dev = read_data(join(folder, "RevStr/dev.txt"))
saved_model_fp = join(project_dir, "notebooks/Hyperparatemers Tuning/RevStr_lstm_model.pt")

train_dl = create_dataloader(train, batch_size=tr_batch_size, shuffle=shuffle_batch)
dev_dl = create_dataloader(dev, batch_size=dev_batch_size, shuffle=shuffle_batch)

## Hyperparameter set 1

In [9]:
in_vocab = ascii_lowercase
out_vocab = ascii_lowercase
in_vocab_size = len(in_vocab) + 2
out_vocab_size = len(out_vocab) + 2

hidden_size = 256
embd_dim = 128
num_layers = 1
rnn_type = "LSTM"
dropout_rate = 0.0
bidirectional = True
use_attention = False
reduction_method = torch.sum

learning_rate = 5e-4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [10]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 3.156470203399658, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.010700952785555273, 'overlap rate': 0.08123138044029474}
evaluation performance: {'loss': 3.124418765306473, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.013804840645752848, 'overlap rate': 0.09062504991889}

Current epoch: 10, 
training performance: {'loss': 2.5327584981918334, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.019364073278848083, 'overlap rate': 0.22426162362098695}
evaluation performance: {'loss': 2.868162214756012, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.019482497801072897, 'overlap rate': 0.12558982335031033}

Current epoch: 15, 
training performance: {'loss': 2.0444620966911318, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.03903427100740373, 'overlap rate': 0.3582210224121809}
evaluation performance: {'loss': 2.768571138381958, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.037255165

KeyboardInterrupt: ignored

## Hyperparameter set 2


In [12]:
hidden_size = 384
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-4
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [13]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 3.0998071312904356, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.015165035880636423, 'overlap rate': 0.09512267261743546}
evaluation performance: {'loss': 3.0808730483055116, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.015795153030194343, 'overlap rate': 0.0990808418020606}

Current epoch: 10, 
training performance: {'loss': 2.4021985054016115, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.025761540955863894, 'overlap rate': 0.25648026503622534}
evaluation performance: {'loss': 2.7952981114387514, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.025953369843773545, 'overlap rate': 0.13833190836012363}

Current epoch: 15, 
training performance: {'loss': 1.7518838316202163, 'abosulate accuracy': 0.00010000000474974513, 'consecutive overlap rate': 0.06396952425129712, 'overlap rate': 0.43575517311692236}
evaluation performance: {'loss': 2.6352903217077257, 'abosulate accuracy': 0.0001000000047497

KeyboardInterrupt: ignored

## Hyperparameter set 3

In [14]:
hidden_size = 384
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [15]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.9917359977960587, 'abosulate accuracy': 0.00010000000474974513, 'consecutive overlap rate': 0.09928605342283844, 'overlap rate': 0.36638888344168663}
evaluation performance: {'loss': 2.4620127826929092, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.12910344367846846, 'overlap rate': 0.23979217186570168}

Current epoch: 10, 
training performance: {'loss': 1.1593837656080723, 'abosulate accuracy': 0.011600000644102692, 'consecutive overlap rate': 0.22039294634014367, 'overlap rate': 0.603282880783081}
evaluation performance: {'loss': 2.315541833639145, 'abosulate accuracy': 5.0000002374872565e-05, 'consecutive overlap rate': 0.2194042459130287, 'overlap rate': 0.35284348502755164}

Current epoch: 15, 
training performance: {'loss': 0.8628774482756854, 'abosulate accuracy': 0.04420000233221799, 'consecutive overlap rate': 0.29633779525756837, 'overlap rate': 0.7007954493165016}
evaluation performance: {'loss': 2.32217605710029

In [16]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 0.022206133205327206, 'abosulate accuracy': 0.7573000296950341, 'consecutive overlap rate': 0.9142616584897041, 'overlap rate': 0.9946980372071266}
evaluation performance: {'loss': 4.530015012621879, 'abosulate accuracy': 0.05600000281119719, 'consecutive overlap rate': 0.2923725279048085, 'overlap rate': 0.5398232460021972}

Current epoch: 10, 
training performance: {'loss': 0.0344968291639816, 'abosulate accuracy': 0.6525000303983688, 'consecutive overlap rate': 0.8393292024731636, 'overlap rate': 0.9897615984082222}
evaluation performance: {'loss': 4.539888751506806, 'abosulate accuracy': 0.05685000268858857, 'consecutive overlap rate': 0.2953063528984785, 'overlap rate': 0.542120935767889}

Current epoch: 15, 
training performance: {'loss': 0.12655834497418256, 'abosulate accuracy': 0.3808000199380331, 'consecutive overlap rate': 0.6086457669734955, 'overlap rate': 0.9551188156008721}
evaluation performance: {'loss': 4.5151604205369

KeyboardInterrupt: ignored

# RedStr



In [17]:
train = read_data(join(folder, "RedStr/train.txt"))
dev = read_data(join(folder, "RedStr/dev.txt"))
saved_model_fp = join(project_dir, "notebooks/Hyperparatemers Tuning/RevStr_model.pt")

train_dl = create_dataloader(train, batch_size=tr_batch_size, shuffle=shuffle_batch)
dev_dl = create_dataloader(dev, batch_size=dev_batch_size, shuffle=shuffle_batch)

## Hyperparameter set 1


In [18]:
hidden_size = 384
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.9082737118005753, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.1081216687336564, 'overlap rate': 0.3857444874942303}
evaluation performance: {'loss': 2.4091228723526, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.1430215218104422, 'overlap rate': 0.25363508686423303}

Current epoch: 10, 
training performance: {'loss': 1.1443809315562248, 'abosulate accuracy': 0.008400000364053995, 'consecutive overlap rate': 0.21873919554054738, 'overlap rate': 0.6102190256118775}
evaluation performance: {'loss': 2.231535592675209, 'abosulate accuracy': 0.0009500000218395144, 'consecutive overlap rate': 0.2236722782254219, 'overlap rate': 0.3659326836466789}

Current epoch: 15, 
training performance: {'loss': 0.8297893077135086, 'abosulate accuracy': 0.05440000231610611, 'consecutive overlap rate': 0.3123174518346786, 'overlap rate': 0.7143769800662995}
evaluation performance: {'loss': 2.18544145822525, 'abosulate accuracy': 0.0

In [None]:
plot_log(log)