# Accessing the project directory on my Google Drive  

In [1]:
import os
from google.colab import drive

drive.mount('/drive', force_remount=False)
project_dir = "/drive/My Drive/RNNSeq2Seq/"

# change working directory to project_dir
os.chdir(project_dir)

Mounted at /drive


# Dependencies

In [2]:
from scripts.model import *
from scripts.dataloader import create_dataloader
from scripts.utils import *
from scripts.pytorch_utils import *
import matplotlib.pyplot as plt

# Helper function to help visualize 

In [3]:
def plot_log(log):
    len_range = len(log) -1 if "Best eval accu" in log else len(log)
    epoch_nums, train_loss, dev_loss = [], [], []
    train_acc, dev_acc = [], []

    for epoch in range(1, len_range+1):
        epoch_nums.append(epoch)
        train = log[f"Epoch#{epoch}"]["Train"]
        dev = log[f"Epoch#{epoch}"]["Eval"]
        train_loss.append(train['loss'])
        train_acc.append(train['abosulate accuracy'])

        dev_loss.append(dev['loss'])
        dev_acc.append(dev['abosulate accuracy'])

    ax1 = plt.subplot(211)
    ax1.plot(epoch_nums, train_loss, label="train loss")
    ax1.plot(epoch_nums, dev_loss, label="dev loss")
    ax1.legend()

    ax2 = plt.subplot(212)
    ax2.plot(epoch_nums, train_acc, label="train acc")
    ax2.plot(epoch_nums, dev_acc, label="dev acc")
    ax2.legend()
    plt.show()

In [4]:
folder = "tuning_data/"
experiment_num = 6
shuffle_batch = True

# must be the num of seq per seq length if shuffle_batch is False
# this "tr_batch_size" will be used for dev, and shuffled test sets
tr_batch_size = 250 
dev_batch_size = 500 if not shuffle_batch else tr_batch_size

In [5]:
# setups for training and evaluting

print_freq = 5
acc_threshold = 0.5 # for saving the best trained models 
max_epoch_num = 150 # max num of epoches 
train_acc_exit=0.92 # train acc exit threshold (eval_acc_exit must also be met)
eval_acc_exit=0.92 # dev acc exit threshold (train_acc_exit must also be met)
teacher_forcing_ratio = 1.0 # probability of using real symbol from target sequence for training

# RevStr

In [28]:
train = read_data(join(folder, "RevStr/train.txt"))
dev = read_data(join(folder, "RevStr/dev.txt"))
saved_model_fp = join(project_dir, "notebooks/Hyperparatemers Tuning/RevStr_lstm_model.pt")

train_dl = create_dataloader(train, batch_size=tr_batch_size, shuffle=shuffle_batch)
dev_dl = create_dataloader(dev, batch_size=dev_batch_size, shuffle=shuffle_batch)

## Hyperparameter set 1

In [8]:
in_vocab = ascii_lowercase
out_vocab = ascii_lowercase
in_vocab_size = len(in_vocab) + 2
out_vocab_size = len(out_vocab) + 2

hidden_size = 384
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
dropout_rate = 0.0
bidirectional = True
use_attention = False
reduction_method = torch.sum

learning_rate = 5e-3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.9610295355319978, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.000718333351687761, 'overlap rate': 0.42039671167731285}
evaluation performance: {'loss': 3.7706141501665114, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006416666794393678, 'overlap rate': 0.2826591946184635}

Current epoch: 10, 
training performance: {'loss': 1.9244585275650024, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0013233333651442082, 'overlap rate': 0.4365433774888515}
evaluation performance: {'loss': 3.1461852490901947, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0012466666885302402, 'overlap rate': 0.43257921151816847}

Current epoch: 15, 
training performance: {'loss': 1.8691504895687103, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0015633333576261065, 'overlap rate': 0.445408371090889}
evaluation performance: {'loss': 2.0653246700763703, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.

KeyboardInterrupt: ignored

## Hyperparameter set 2

In [None]:
hidden_size = 512
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.9111924797296524, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006133333467005286, 'overlap rate': 0.43138837814331055}
evaluation performance: {'loss': 1.9050104081630708, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006635310834099073, 'overlap rate': 0.43052393905818465}

Current epoch: 10, 
training performance: {'loss': 1.8986809074878692, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006633333490754012, 'overlap rate': 0.4322350464761257}
evaluation performance: {'loss': 1.8990997105836869, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006575000181328506, 'overlap rate': 0.4310650438070297}

Current epoch: 15, 
training performance: {'loss': 1.897565519809723, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006866666772111785, 'overlap rate': 0.43212337642908094}
evaluation performance: {'loss': 1.8993864625692367, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 

KeyboardInterrupt: ignored

## Hyperparameter set 3

In [None]:
hidden_size = 384
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 1e-2
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.9252118825912476, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0009800000203540548, 'overlap rate': 0.4322133757174015}
evaluation performance: {'loss': 1.9584557130932807, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0010593079343379941, 'overlap rate': 0.43358810655772684}

Current epoch: 10, 
training performance: {'loss': 1.8526558339595796, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0032883333798963575, 'overlap rate': 0.4449133761227131}
evaluation performance: {'loss': 1.8744195833802224, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0038691667228704318, 'overlap rate': 0.43538004383444784}

Current epoch: 15, 
training performance: {'loss': 1.7191067188978195, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.014901666902005672, 'overlap rate': 0.4793933808803558}
evaluation performance: {'loss': 1.7767226949334145, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0

## Hyperparameter set 4

In [9]:
hidden_size = 512
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-4
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.9026744425296784, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006566666819708189, 'overlap rate': 0.43093337342143057}
evaluation performance: {'loss': 1.9013691633939742, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006675000153336441, 'overlap rate': 0.43060754165053367}

Current epoch: 10, 
training performance: {'loss': 1.8968867063522339, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0007166666764533147, 'overlap rate': 0.43261504620313646}
evaluation performance: {'loss': 1.8985399201512336, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006959039688808844, 'overlap rate': 0.43096624240279197}

Current epoch: 15, 
training performance: {'loss': 1.895988357067108, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0007033333458821289, 'overlap rate': 0.4333900444209576}
evaluation performance: {'loss': 1.8980976849794389, 'abosulate accuracy': 0.0, 'consecutive overlap rate':

KeyboardInterrupt: ignored

## Hyperparameter set 5

In [13]:
hidden_size = 512
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-2
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [14]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.9961162239313126, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006666666813543998, 'overlap rate': 0.41714170947670937}
evaluation performance: {'loss': 5.211909478902816, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006591666799067753, 'overlap rate': 0.037454170919954774}

Current epoch: 10, 
training performance: {'loss': 2.6407080829143523, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0007266666754730978, 'overlap rate': 0.4171900376677513}
evaluation performance: {'loss': 5.435972255468369, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006458333497903368, 'overlap rate': 0.030111670424230397}

Current epoch: 15, 
training performance: {'loss': 1.9742255955934525, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0007033333509752993, 'overlap rate': 0.42179338037967684}
evaluation performance: {'loss': 4.1781165361404415, 'abosulate accuracy': 0.0, 'consecutive overlap rate'

KeyboardInterrupt: ignored

## Hyperparameter set 6

In [15]:
hidden_size = 256
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-4
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [16]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.9123721271753311, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006683333485852927, 'overlap rate': 0.4307867087423801}
evaluation performance: {'loss': 1.9386005595326423, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0007141666823372361, 'overlap rate': 0.4299425456672907}

Current epoch: 10, 
training performance: {'loss': 1.8980722695589065, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006785028366721235, 'overlap rate': 0.431949283182621}
evaluation performance: {'loss': 1.8983167365193367, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006958333482543821, 'overlap rate': 0.4309733796864748}

Current epoch: 15, 
training performance: {'loss': 1.8961813658475877, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0008066666865488515, 'overlap rate': 0.43309338092803956}
evaluation performance: {'loss': 1.8975609421730042, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.

KeyboardInterrupt: ignored

## Hyperparameter set 7

In [23]:
hidden_size = 384
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [24]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.908746635913849, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006683333464025054, 'overlap rate': 0.4306583806872368}
evaluation performance: {'loss': 1.8993145197629928, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006491666790680028, 'overlap rate': 0.43054004311561583}

Current epoch: 10, 
training performance: {'loss': 1.8965239077806473, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006900000131281559, 'overlap rate': 0.43254504203796384}
evaluation performance: {'loss': 1.8978860899806023, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006600000142498175, 'overlap rate': 0.4311258740723133}

Current epoch: 15, 
training performance: {'loss': 1.8960694134235383, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006316666782367975, 'overlap rate': 0.43304004669189455}
evaluation performance: {'loss': 1.8986768454313279, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 

KeyboardInterrupt: ignored

## Hyperparameter set 8

In [29]:
hidden_size = 512
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [30]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.96162751019001, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006416666787117719, 'overlap rate': 0.4197617068886757}
evaluation performance: {'loss': 3.280040580034256, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006808333448134363, 'overlap rate': 0.22388252150267363}

Current epoch: 10, 
training performance: {'loss': 1.941453191637993, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006050000127288512, 'overlap rate': 0.4278133764863014}
evaluation performance: {'loss': 2.541892331838608, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006650000155786984, 'overlap rate': 0.4284667085856199}

Current epoch: 15, 
training performance: {'loss': 1.8987116992473603, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0007016666779236403, 'overlap rate': 0.43186170905828475}
evaluation performance: {'loss': 1.9000818610191346, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.000

KeyboardInterrupt: ignored

# RedStr



In [25]:
train = read_data(join(folder, "RedStr/train.txt"))
dev = read_data(join(folder, "RedStr/dev.txt"))
saved_model_fp = join(project_dir, "notebooks/Hyperparatemers Tuning/RevStr_model.pt")

train_dl = create_dataloader(train, batch_size=tr_batch_size, shuffle=shuffle_batch)
dev_dl = create_dataloader(dev, batch_size=dev_batch_size, shuffle=shuffle_batch)

## Hyperparameter set 1


In [18]:
hidden_size = 384
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [19]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.2221718728542328, 'abosulate accuracy': 0.0019000000902451575, 'consecutive overlap rate': 0.0665450006723404, 'overlap rate': 0.6225267291069031}
evaluation performance: {'loss': 1.4914841458201409, 'abosulate accuracy': 0.003200000140350312, 'consecutive overlap rate': 0.07476583467796445, 'overlap rate': 0.5522575534880161}

Current epoch: 10, 
training performance: {'loss': 0.6765504240989685, 'abosulate accuracy': 0.0831000043079257, 'consecutive overlap rate': 0.2025883361697197, 'overlap rate': 0.7745067432522774}
evaluation performance: {'loss': 1.268258836865425, 'abosulate accuracy': 0.07190000372938812, 'consecutive overlap rate': 0.18455083575099707, 'overlap rate': 0.6531759008765221}

Current epoch: 15, 
training performance: {'loss': 0.44640626609325407, 'abosulate accuracy': 0.19590000957250595, 'consecutive overlap rate': 0.3198866702616215, 'overlap rate': 0.8430600747466087}
evaluation performance: {'loss': 1.210819

KeyboardInterrupt: ignored

## Hyperparameter set 2

In [20]:
hidden_size = 384
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [21]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.608892983198166, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.007940000121016056, 'overlap rate': 0.512643388658762}
evaluation performance: {'loss': 1.6680292353034019, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.008733333431882784, 'overlap rate': 0.48456421308219433}

Current epoch: 10, 
training performance: {'loss': 1.176738628745079, 'abosulate accuracy': 0.001600000075995922, 'consecutive overlap rate': 0.03110333359800279, 'overlap rate': 0.6327284008264542}
evaluation performance: {'loss': 1.6235986098647117, 'abosulate accuracy': 0.0029000001202803107, 'consecutive overlap rate': 0.03385250018909573, 'overlap rate': 0.5169825538992882}

Current epoch: 15, 
training performance: {'loss': 0.9039192378520966, 'abosulate accuracy': 0.031100001465529204, 'consecutive overlap rate': 0.0814633342437446, 'overlap rate': 0.7080567449331283}
evaluation performance: {'loss': 1.607679156959057, 'abosulate accura

## Hyperparameter set 3

In [26]:
hidden_size = 512
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [27]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.632438600063324, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.006960000086110085, 'overlap rate': 0.5064483851194381}
evaluation performance: {'loss': 1.6582319915294648, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.007836666773073375, 'overlap rate': 0.49147755093872547}

Current epoch: 10, 
training performance: {'loss': 1.1512112230062486, 'abosulate accuracy': 0.002300000109244138, 'consecutive overlap rate': 0.0329650002066046, 'overlap rate': 0.6424467265605927}
evaluation performance: {'loss': 1.5906814634799957, 'abosulate accuracy': 0.00520000018295832, 'consecutive overlap rate': 0.03942750052083284, 'overlap rate': 0.5298267185688019}

Current epoch: 15, 
training performance: {'loss': 0.8675558537244796, 'abosulate accuracy': 0.04420000184327364, 'consecutive overlap rate': 0.10043000150471926, 'overlap rate': 0.7210617393255234}
evaluation performance: {'loss': 1.5656942322850227, 'abosulate accura