# Accessing the project directory on my Google Drive  

In [1]:
import os
from google.colab import drive

drive.mount('/drive', force_remount=False)
project_dir = "/drive/My Drive/RNNSeq2Seq/"

# change working directory to project_dir
os.chdir(project_dir)

Mounted at /drive


# Dependencies

In [2]:
from scripts.model import *
from scripts.dataloader import create_dataloader
from scripts.utils import *
from scripts.pytorch_utils import *
import matplotlib.pyplot as plt

# Helper function to help visualize 

In [3]:
def plot_log(log):
    len_range = len(log) -1 if "Best eval accu" in log else len(log)
    epoch_nums, train_loss, dev_loss = [], [], []
    train_acc, dev_acc = [], []

    for epoch in range(1, len_range+1):
        epoch_nums.append(epoch)
        train = log[f"Epoch#{epoch}"]["Train"]
        dev = log[f"Epoch#{epoch}"]["Eval"]
        train_loss.append(train['loss'])
        train_acc.append(train['abosulate accuracy'])

        dev_loss.append(dev['loss'])
        dev_acc.append(dev['abosulate accuracy'])

    ax1 = plt.subplot(211)
    ax1.plot(epoch_nums, train_loss, label="train loss")
    ax1.plot(epoch_nums, dev_loss, label="dev loss")
    ax1.legend()

    ax2 = plt.subplot(212)
    ax2.plot(epoch_nums, train_acc, label="train acc")
    ax2.plot(epoch_nums, dev_acc, label="dev acc")
    ax2.legend()
    plt.show()

In [4]:
folder = "tuning_data/"
experiment_num = 5
shuffle_batch = False

# must be the num of seq per seq length if shuffle_batch is False
# this "tr_batch_size" will be used for dev, and shuffled test sets
tr_batch_size = 250 
dev_batch_size = 500 if not shuffle_batch else tr_batch_size

In [5]:
# setups for training and evaluting

print_freq = 5
acc_threshold = 0.5 # for saving the best trained models 
max_epoch_num = 100 # max num of epoches 
train_acc_exit=0.92 # train acc exit threshold (eval_acc_exit must also be met)
eval_acc_exit=0.92 # dev acc exit threshold (train_acc_exit must also be met)
teacher_forcing_ratio = 1.0 # probability of using real symbol from target sequence for training

# RevStr

In [6]:
train = read_data(join(folder, "RevStr/train.txt"))
dev = read_data(join(folder, "RevStr/dev.txt"))
saved_model_fp = join(project_dir, "notebooks/Hyperparatemers Tuning/RevStr_model.pt")

train_dl = create_dataloader(train, batch_size=tr_batch_size, shuffle=shuffle_batch)
dev_dl = create_dataloader(dev, batch_size=dev_batch_size, shuffle=shuffle_batch)

## Hyperparameter set 1

In [7]:
in_vocab = ascii_lowercase
out_vocab = ascii_lowercase
in_vocab_size = len(in_vocab) + 2
out_vocab_size = len(out_vocab) + 2

hidden_size = 256
embd_dim = 128
num_layers = 1
rnn_type = "GRU"
dropout_rate = 0.0
bidirectional = True
use_attention = False
reduction_method = torch.sum

learning_rate = 5e-4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 3.0443244218826293, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.012160637124907225, 'overlap rate': 0.10220477618277073}
evaluation performance: {'loss': 3.0234518229961393, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.012361195916309953, 'overlap rate': 0.11029099337756634}

Current epoch: 10, 
training performance: {'loss': 2.6249762952327726, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.019255843642167748, 'overlap rate': 0.19049107581377028}
evaluation performance: {'loss': 2.8376217663288115, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.019514523260295392, 'overlap rate': 0.12670457027852536}

Current epoch: 15, 
training performance: {'loss': 2.1843094736337663, 'abosulate accuracy': 0.00010000000474974513, 'consecutive overlap rate': 0.033675825339742005, 'overlap rate': 0.3005087848752737}
evaluation performance: {'loss': 2.807639145851135, 'abosulate accuracy': 0.0, 'consecutive 

## Hyperparameter set 2

In [None]:
hidden_size = 256
embd_dim = 128
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 2.9126568377017974, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.013520028500352055, 'overlap rate': 0.13122933227568864}
evaluation performance: {'loss': 2.925635778903961, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.013257825968321413, 'overlap rate': 0.13175008147954942}

Current epoch: 10, 
training performance: {'loss': 2.2911397010087966, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.03354146257042885, 'overlap rate': 0.26688613295555114}
evaluation performance: {'loss': 2.741452944278717, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.037794252927415076, 'overlap rate': 0.14590164162218572}

Current epoch: 15, 
training performance: {'loss': 1.8169244319200515, 'abosulate accuracy': 0.00010000000474974513, 'consecutive overlap rate': 0.06740375375375152, 'overlap rate': 0.39445974081754687}
evaluation performance: {'loss': 2.6597967445850372, 'abosulate accuracy': 0.0, 'consecutive ov

## Hyperparameter set 3

In [None]:
hidden_size = 256
embd_dim = 128
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 2.3049215406179426, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.055567068560048935, 'overlap rate': 0.2940044317394495}
evaluation performance: {'loss': 2.742922383546829, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.06016915771178901, 'overlap rate': 0.1654029691591859}

Current epoch: 10, 
training performance: {'loss': 1.7169772773981093, 'abosulate accuracy': 0.0007000000216066838, 'consecutive overlap rate': 0.11322457320056856, 'overlap rate': 0.43740072324872015}
evaluation performance: {'loss': 2.5697828978300095, 'abosulate accuracy': 5.0000002374872565e-05, 'consecutive overlap rate': 0.10658800131641329, 'overlap rate': 0.22457422968000174}

Current epoch: 15, 
training performance: {'loss': 1.5313187643885613, 'abosulate accuracy': 0.001700000069104135, 'consecutive overlap rate': 0.1360089026391506, 'overlap rate': 0.4895807929337025}
evaluation performance: {'loss': 2.4759729355573654, 'abosulate a

## Hyperparameter set 4

In [None]:
hidden_size = 384
embd_dim = 256
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.972114196419716, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.08048825673758983, 'overlap rate': 0.3730102121829987}
evaluation performance: {'loss': 2.652190828323364, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.09564851424656809, 'overlap rate': 0.20456149708479643}

Current epoch: 10, 
training performance: {'loss': 1.6274954825639725, 'abosulate accuracy': 0.001000000024214387, 'consecutive overlap rate': 0.12758418852463366, 'overlap rate': 0.46655468419194224}
evaluation performance: {'loss': 2.522130677103996, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.1203615847043693, 'overlap rate': 0.2365854186937213}

Current epoch: 15, 
training performance: {'loss': 1.3494064196944238, 'abosulate accuracy': 0.003100000147242099, 'consecutive overlap rate': 0.1586532368324697, 'overlap rate': 0.5406599327921867}
evaluation performance: {'loss': 2.5724943190813065, 'abosulate accuracy': 0.000100000004

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 0.600164377875626, 'abosulate accuracy': 0.1344000066164881, 'consecutive overlap rate': 0.3374956889078021, 'overlap rate': 0.7827242478728295}
evaluation performance: {'loss': 4.2603305101394655, 'abosulate accuracy': 0.0016000000934582204, 'consecutive overlap rate': 0.1403004890307784, 'overlap rate': 0.2887460645288229}

Current epoch: 10, 
training performance: {'loss': 0.6046456100419164, 'abosulate accuracy': 0.11350000406382606, 'consecutive overlap rate': 0.32521769013255836, 'overlap rate': 0.7790008008480072}
evaluation performance: {'loss': 3.601568478345871, 'abosulate accuracy': 0.003750000207219273, 'consecutive overlap rate': 0.15429147761315107, 'overlap rate': 0.3070066154003143}

Current epoch: 15, 
training performance: {'loss': 0.5936929602175951, 'abosulate accuracy': 0.13340000626631082, 'consecutive overlap rate': 0.32219572067260743, 'overlap rate': 0.7835650026798249}
evaluation performance: {'loss': 3.6822851

KeyboardInterrupt: ignored

## Hyperparameter set 5

In [None]:
hidden_size = 384
embd_dim = 256
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 1e-2
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.8402043491601945, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.10010498519986868, 'overlap rate': 0.4095432639122009}
evaluation performance: {'loss': 2.4810071676969527, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.11030336348339916, 'overlap rate': 0.22402743604034187}

Current epoch: 10, 
training performance: {'loss': 1.588365724682808, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.1271604798734188, 'overlap rate': 0.47237740606069567}
evaluation performance: {'loss': 2.424419692158699, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.13743416303768755, 'overlap rate': 0.25420168340206145}

Current epoch: 15, 
training performance: {'loss': 1.4675403550267219, 'abosulate accuracy': 0.001500000082887709, 'consecutive overlap rate': 0.14072328666225076, 'overlap rate': 0.5036272786557674}
evaluation performance: {'loss': 2.528076535463333, 'abosulate accuracy': 5.0000002374872565e-05, 'cons

KeyboardInterrupt: ignored

## Hyperparameter set 6

In [None]:
hidden_size = 384
embd_dim = 384
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.8595738738775254, 'abosulate accuracy': 0.00020000000949949026, 'consecutive overlap rate': 0.10082206623628735, 'overlap rate': 0.40751611813902855}
evaluation performance: {'loss': 2.5551883071660995, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.11129235690459609, 'overlap rate': 0.22447042129933834}

Current epoch: 10, 
training performance: {'loss': 1.5662807151675224, 'abosulate accuracy': 0.000900000031106174, 'consecutive overlap rate': 0.14324401542544365, 'overlap rate': 0.4862057529389858}
evaluation performance: {'loss': 2.467013505101204, 'abosulate accuracy': 5.0000002374872565e-05, 'consecutive overlap rate': 0.13630389450117947, 'overlap rate': 0.2593775406479836}

Current epoch: 15, 
training performance: {'loss': 1.3419639438390731, 'abosulate accuracy': 0.005400000128429383, 'consecutive overlap rate': 0.17027265802025796, 'overlap rate': 0.5461578264832496}
evaluation performance: {'loss': 2.590437036752

## Hyperparameter set 7

In [None]:
hidden_size = 384
embd_dim = 384
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 1e-2
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.8045309484004974, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.11230954974889755, 'overlap rate': 0.41673189848661424}
evaluation performance: {'loss': 2.4128523617982864, 'abosulate accuracy': 5.0000002374872565e-05, 'consecutive overlap rate': 0.12609830694273114, 'overlap rate': 0.23878691755235196}

Current epoch: 10, 
training performance: {'loss': 1.5889118611812592, 'abosulate accuracy': 0.0006000000401400029, 'consecutive overlap rate': 0.14056064914911987, 'overlap rate': 0.47642791643738747}
evaluation performance: {'loss': 2.487309941649437, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.12945145247504114, 'overlap rate': 0.24128684550523757}

Current epoch: 15, 
training performance: {'loss': 1.4109566316008568, 'abosulate accuracy': 0.004100000066682696, 'consecutive overlap rate': 0.16713412841781974, 'overlap rate': 0.52812769562006}
evaluation performance: {'loss': 2.5080868005752563, 'abosulate a

## Hyperparameter set 8

In [8]:
hidden_size = 512
embd_dim = 256
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [9]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 3.031817948818207, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.013853928644675762, 'overlap rate': 0.1044269297271967}
evaluation performance: {'loss': 3.027434378862381, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.012419226102065296, 'overlap rate': 0.10961329620331525}

Current epoch: 10, 
training performance: {'loss': 2.760879617929459, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.024181897612288596, 'overlap rate': 0.1601911824196577}
evaluation performance: {'loss': 2.86627779006958, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.02455161060206592, 'overlap rate': 0.12727683894336222}

Current epoch: 15, 
training performance: {'loss': 2.4360922157764433, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.04747556140646338, 'overlap rate': 0.23918884359300135}
evaluation performance: {'loss': 2.7842347502708433, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.046626533

KeyboardInterrupt: ignored

## Hyperparameter set 9

In [10]:
hidden_size = 512
embd_dim = 256
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 1e-2
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.9140735745429993, 'abosulate accuracy': 0.00020000000949949026, 'consecutive overlap rate': 0.1017451991327107, 'overlap rate': 0.3891383849084377}
evaluation performance: {'loss': 2.4883206516504286, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.1291859376244247, 'overlap rate': 0.22950587831437588}

Current epoch: 10, 
training performance: {'loss': 1.5662868678569795, 'abosulate accuracy': 0.0011000000289641322, 'consecutive overlap rate': 0.1439401475712657, 'overlap rate': 0.48096401020884516}
evaluation performance: {'loss': 2.537019672989845, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.13414483750239015, 'overlap rate': 0.2312528882175684}

Current epoch: 15, 
training performance: {'loss': 1.4711683824658395, 'abosulate accuracy': 0.005400000349618494, 'consecutive overlap rate': 0.1627841386012733, 'overlap rate': 0.512588656693697}
evaluation performance: {'loss': 2.5733671456575395, 'abosulate accura

## Hyperparameter set 10

In [14]:
hidden_size = 512
embd_dim = 256
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 2e-2
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [15]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 2.5112193554639815, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0989478942938149, 'overlap rate': 0.24090943075716495}
evaluation performance: {'loss': 2.7056227743625643, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.10253448057919741, 'overlap rate': 0.1825688049197197}

Current epoch: 10, 
training performance: {'loss': 2.2029256343841555, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.1457588043063879, 'overlap rate': 0.3280888643115759}
evaluation performance: {'loss': 2.5515179514884947, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.13601780589669943, 'overlap rate': 0.21533020883798598}

Current epoch: 15, 
training performance: {'loss': 2.0885771036148073, 'abosulate accuracy': 0.00010000000474974513, 'consecutive overlap rate': 0.16542359199374915, 'overlap rate': 0.3591531738638878}
evaluation performance: {'loss': 2.506507897377014, 'abosulate accuracy': 0.0, 'consecutive overlap r

KeyboardInterrupt: ignored

## Hyperparameter set 11

In [16]:
hidden_size = 512
embd_dim = 256
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [17]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.8457231163978576, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.10146978795528412, 'overlap rate': 0.40889989510178565}
evaluation performance: {'loss': 2.578743091225624, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.11836120039224625, 'overlap rate': 0.22655879948288202}

Current epoch: 10, 
training performance: {'loss': 1.4831101804971696, 'abosulate accuracy': 0.0021000001463107763, 'consecutive overlap rate': 0.15494112409651278, 'overlap rate': 0.5050593242049217}
evaluation performance: {'loss': 2.5599245518445968, 'abosulate accuracy': 5.0000002374872565e-05, 'consecutive overlap rate': 0.13186672385782003, 'overlap rate': 0.24406138882040979}

Current epoch: 15, 
training performance: {'loss': 1.3521871998906136, 'abosulate accuracy': 0.0058000003336928785, 'consecutive overlap rate': 0.17285830015316606, 'overlap rate': 0.5427667975425721}
evaluation performance: {'loss': 2.4915643692016602, 'abosulate

## Hyperparameter set 12

In [18]:
hidden_size = 512
embd_dim = 256
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [19]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 2.815667188167572, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.014692367706447839, 'overlap rate': 0.14519557543098927}
evaluation performance: {'loss': 2.8910782516002653, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.017423712951131164, 'overlap rate': 0.12412845976650715}

Current epoch: 10, 
training performance: {'loss': 1.8164962440729142, 'abosulate accuracy': 0.00010000000474974513, 'consecutive overlap rate': 0.07036153501830995, 'overlap rate': 0.40072223618626596}
evaluation performance: {'loss': 2.649894317984581, 'abosulate accuracy': 0.00025000000605359676, 'consecutive overlap rate': 0.06981752207502723, 'overlap rate': 0.19566846471279858}

Current epoch: 15, 
training performance: {'loss': 1.4221404552459718, 'abosulate accuracy': 0.0018000000971369445, 'consecutive overlap rate': 0.12350750556215644, 'overlap rate': 0.5111628875136376}
evaluation performance: {'loss': 2.5960435688495638, 'abosul

# RedStr



In [20]:
train = read_data(join(folder, "RedStr/train.txt"))
dev = read_data(join(folder, "RedStr/dev.txt"))
saved_model_fp = join(project_dir, "notebooks/Hyperparatemers Tuning/RevStr_model.pt")

train_dl = create_dataloader(train, batch_size=tr_batch_size, shuffle=shuffle_batch)
dev_dl = create_dataloader(dev, batch_size=dev_batch_size, shuffle=shuffle_batch)

## Hyperparameter set 1


In [None]:
hidden_size = 384
embd_dim = 256
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.8088250249624251, 'abosulate accuracy': 0.00020000000949949026, 'consecutive overlap rate': 0.10523941479623318, 'overlap rate': 0.42361762672662734}
evaluation performance: {'loss': 2.7309108078479767, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.09256237791851163, 'overlap rate': 0.20031944029033183}

Current epoch: 10, 
training performance: {'loss': 1.5963514566421508, 'abosulate accuracy': 0.0005000000237487257, 'consecutive overlap rate': 0.12453538281843066, 'overlap rate': 0.47496216744184494}
evaluation performance: {'loss': 2.8886157751083372, 'abosulate accuracy': 5.0000002374872565e-05, 'consecutive overlap rate': 0.10180661920458078, 'overlap rate': 0.20439665410667657}

Current epoch: 15, 
training performance: {'loss': 1.4912467643618583, 'abosulate accuracy': 0.0019000000553205608, 'consecutive overlap rate': 0.14019142482429742, 'overlap rate': 0.5049820877611637}
evaluation performance: {'loss': 2.5869862

## Hyperparameter set 2

In [None]:
hidden_size = 384
embd_dim = 384
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.8285910844802857, 'abosulate accuracy': 0.00020000000949949026, 'consecutive overlap rate': 0.10580138759687543, 'overlap rate': 0.42035203948616984}
evaluation performance: {'loss': 2.579826498031616, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.09997188746929168, 'overlap rate': 0.21555438078939915}

Current epoch: 10, 
training performance: {'loss': 1.5857378512620925, 'abosulate accuracy': 0.0005000000121071935, 'consecutive overlap rate': 0.13992802910506724, 'overlap rate': 0.48121438845992087}
evaluation performance: {'loss': 2.4583598762750625, 'abosulate accuracy': 0.00010000000474974513, 'consecutive overlap rate': 0.14057109765708448, 'overlap rate': 0.25959455482661725}

Current epoch: 15, 
training performance: {'loss': 1.3963282480835915, 'abosulate accuracy': 0.005300000251736492, 'consecutive overlap rate': 0.16188398888334632, 'overlap rate': 0.5322240747511386}
evaluation performance: {'loss': 2.441483524

## Hyperparameter set 3

In [21]:
hidden_size = 512
embd_dim = 256
num_layers = 1
rnn_type = "GRU"
encoder = Encoder(in_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type, 
                  dropout_rate, bidirectional, 
                  reduction_method)
attention = Attention(hidden_size)
decoder = Decoder(out_vocab_size, hidden_size, 
                  embd_dim, num_layers, rnn_type,
                  attention, use_attention, 
                  dropout_rate)

model = Seq2Seq(encoder, decoder, device).to(device)
model.apply(init_weights)

learning_rate = 5e-3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                         saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                         train_acc_exit, eval_acc_exit, teacher_forcing_ratio)

Current epoch: 5, 
training performance: {'loss': 1.9285337150096893, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.08858793620020151, 'overlap rate': 0.38156607672572135}
evaluation performance: {'loss': 2.588759559392929, 'abosulate accuracy': 5.0000002374872565e-05, 'consecutive overlap rate': 0.10302754547446966, 'overlap rate': 0.21394946258515118}

Current epoch: 10, 
training performance: {'loss': 1.5709426715970038, 'abosulate accuracy': 0.0011000000638887286, 'consecutive overlap rate': 0.13491971464827657, 'overlap rate': 0.4782140552997589}
evaluation performance: {'loss': 2.53131950199604, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.1337932207621634, 'overlap rate': 0.23817432597279548}

Current epoch: 15, 
training performance: {'loss': 1.3123142421245575, 'abosulate accuracy': 0.005500000156462193, 'consecutive overlap rate': 0.17487190561369062, 'overlap rate': 0.5518921405076981}
evaluation performance: {'loss': 2.620278787612915, 'abosulate accu