# Accessing the project directory on my Google Drive  

In [1]:
import os
from google.colab import drive

drive.mount('/drive', force_remount=False)
project_dir = "/drive/My Drive/RNNSeq2Seq/"

# change working directory to project_dir
os.chdir(project_dir)

Mounted at /drive


# Dependencies

In [2]:
from scripts.model import *
from scripts.dataloader import create_dataloader
from scripts.utils import *
from scripts.pytorch_utils import *

# Initialize model

In [3]:
in_vocab = ascii_lowercase
out_vocab = ascii_lowercase
in_vocab_size = len(in_vocab) + 2
out_vocab_size = len(out_vocab) + 2

hidden_size = 384
embd_dim = 256
num_layers = 1
rnn_type = "LSTM"
dropout_rate = 0.0
bidirectional = False
use_attention = False
reduction_method = torch.sum

learning_rate = 5e-4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Training, testing, and logging 

In [4]:
prefix = "data/"
experiment_num = 8
shuffle_batch = True

# must be the num of seq per seq length if shuffle_batch is False
# this "tr_batch_size" will be used for dev, and shuffled test sets
tr_batch_size = 250
dev_batch_size = 500 if not shuffle_batch else tr_batch_size

# num of seq per length for non-shuffled train sets
te_batch_size = 1000

log_folder, tr_log, te1_log, te2_log, mo_log = create_log_folders(experiment_num, rnn_type)

./Experiments_Logs/Experiment#8/LSTM created!
./Experiments_Logs/Experiment#8/LSTM/Training_Logs created!
./Experiments_Logs/Experiment#8/LSTM/Testing_Logs created!
./Experiments_Logs/Experiment#8/LSTM/Testing_Logs/Test_Logs created!
./Experiments_Logs/Experiment#8/LSTM/Testing_Logs/Test2_Logs created!
./Experiments_Logs/Experiment#8/LSTM/Models_Logs created!


In [5]:
# setups for training and evaluting

print_freq = 5
acc_threshold = 0.0 # for saving the best trained models 
max_epoch_num = 200 # max num of epoches 
train_acc_exit=0.9999 # train acc exit threshold (eval_acc_exit must also be met)
eval_acc_exit=0.995 # dev acc exit threshold (train_acc_exit must also be met)
teacher_forcing_ratio = 1.0 # probability of using real symbol from target sequence for training

print(f"Start experiment#{experiment_num} with {rnn_type} seq2seq...")

for lang in ["RevStr", "RedStr"]:
    
    for n in ["1", "2", "3"]:
        print(f"\n\n{'=' * 20} training on {lang}_{n} starts {'=' * 20}\n")
        folder = join(prefix, lang, n)
        
        # =========================== Initialize model ===========================
        encoder = Encoder(in_vocab_size, hidden_size, 
                          embd_dim, num_layers, rnn_type, 
                          dropout_rate, bidirectional, 
                          reduction_method)
        attention = Attention(hidden_size)
        decoder = Decoder(out_vocab_size, hidden_size, 
                          embd_dim, num_layers, rnn_type,
                          attention, use_attention, 
                          dropout_rate)

        model = Seq2Seq(encoder, decoder, device).to(device)
        model.apply(init_weights)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # =============================== Training ===============================
        train = read_data(join(folder, "train.txt"))
        dev = read_data(join(folder, "dev.txt"))
        saved_model_fp = join(mo_log, f"{lang}_{n}.pt")
        
        train_dl = create_dataloader(train, batch_size=tr_batch_size, shuffle=shuffle_batch)
        dev_dl = create_dataloader(dev, batch_size=dev_batch_size, shuffle=shuffle_batch)
        
        log = train_and_evaluate(model, train_dl, dev_dl, criterion, optimizer, 
                                 saved_model_fp, acc_threshold, print_freq, max_epoch_num, 
                                 train_acc_exit, eval_acc_exit, teacher_forcing_ratio)
        save_dict_as_json(log, join(tr_log, f"{lang}_{n}.json"))

        if shuffle_batch:
            train_dl = create_dataloader(train, batch_size=250, shuffle=False)
            dev_dl = create_dataloader(dev, batch_size=500, shuffle=False)

        _, train_perfms = evaluate(model, train_dl, criterion, True)
        save_dict_as_json(train_perfms, join(tr_log, f"{lang}_{n}_train_performance_per_seq_length.json"))
        _, dev_perfms = evaluate(model, dev_dl, criterion, True)
        save_dict_as_json(dev_perfms, join(tr_log, f"{lang}_{n}_dev_performance_per_seq_length.json"))
        
        
        # =============================== Testing ===============================
        
        # ++++++++++++++++++++++ Test set 1 (in distribution) ++++++++++++++++++++++
        test = read_data(join(folder, "test.txt"))
        test_dl_shuffled = create_dataloader(test, batch_size=tr_batch_size, shuffle=True)
        test_aggr_perf_shuffled = evaluate(model, test_dl_shuffled, criterion)
        print("\ntest's aggregate performace with shuffled batches:", test_aggr_perf_shuffled)
        save_dict_as_json(test_aggr_perf_shuffled, join(te1_log, f"{lang}_{n}_shuffled.json"))
        
        test_dl = create_dataloader(test, batch_size=te_batch_size, shuffle=False)
        test_aggr, test_perfms = evaluate(model, test_dl, criterion, True)
        print("\ntest's aggregate performace with non-shuffled batches:", test_aggr)
        save_dict_as_json(test_perfms, join(te1_log, f"{lang}_{n}_non_shuffled.json"))

        # ++++++++++++++++++++ Test set 2 (out of distribution) ++++++++++++++++++++
        test2 = read_data(join(folder, "test2.txt"))
        test2_dl_shuffled = create_dataloader(test2, batch_size=tr_batch_size, shuffle=True)
        test2_aggr_perf_shuffled = evaluate(model, test2_dl_shuffled, criterion)
        print("\ntest2's aggregate performace with shuffled batches:", test2_aggr_perf_shuffled)
        save_dict_as_json(test2_aggr_perf_shuffled, join(te2_log, f"{lang}_{n}_shuffled.json"))
        
        # This is ONLY SPECIFIC to my experimental setups where len(seq_len1/2) < te_batch_size
        seq_len1, seq_len2, seq_len3plus = test2[:26], test2[26:702], test2[702:]
        seq_len1_dl = create_dataloader(seq_len1, batch_size=te_batch_size)
        seq_len1_aggr_perf = evaluate(model, seq_len1_dl, criterion)
        
        seq_len2_dl = create_dataloader(seq_len2, batch_size=te_batch_size)
        seq_len2_aggr_perf = evaluate(model, seq_len2_dl, criterion)
        
        test2_dl = create_dataloader(seq_len3plus, batch_size=te_batch_size, shuffle=False)
        test_aggr2, test_perfms2 = evaluate(model, test2_dl, criterion, True)
        print("\ntest2's aggregate performace with non-shuffled batches:", test_aggr2)
    
        # append the testing performance of len-1 and len-2 to test_perfms2
        test_perfms2.update({"Len-1": seq_len1_aggr_perf, "Len-2": seq_len2_aggr_perf})    
        save_dict_as_json(test_perfms2, join(te2_log, f"{lang}_{n}_non_shuffled.json"))
        
        # =============================== End of loop ===============================
        print(f"\n{'=' * 20} testing on {lang}_{n} finishes {'=' * 20}")
    
print(f"\n\n...experiment#{experiment_num} with {rnn_type} seq2seq ends.")

Start experiment#8 with LSTM seq2seq...



Current epoch: 5, 
training performance: {'loss': 1.9627960681915284, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006316666811471805, 'overlap rate': 0.41966171115636824}
evaluation performance: {'loss': 3.363228937983513, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006558333472639788, 'overlap rate': 0.2613158605992794}

Current epoch: 10, 
training performance: {'loss': 1.939992517232895, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006550000151037238, 'overlap rate': 0.42743504568934443}
evaluation performance: {'loss': 2.5463053941726685, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0006550000132847344, 'overlap rate': 0.4280808798968792}

Current epoch: 15, 
training performance: {'loss': 1.93025763630867, 'abosulate accuracy': 0.0, 'consecutive overlap rate': 0.0007033333444269374, 'overlap rate': 0.42938670963048936}
evaluation performance: {'loss': 2.401409959793091, 'abosulate accur

# Log Hyperparameters

In [6]:
hyperparameters = {"RNN type": rnn_type, 
                   "in vocab": in_vocab,
                   "out vocab": out_vocab,
                   "embedding size": embd_dim, 
                   "hidden size": hidden_size, 
                   "num of layers": num_layers, 
                   "dropout rate": dropout_rate,
                   "bidirectional": bidirectional, 
                   "use_attention": use_attention, 
                   "loss function": str(criterion), 
                   "optimizer": str(optimizer), 
                   "learning rate": learning_rate, 
                   "device type": device.type,
                   "torch version": torch.__version__,
                   "reduction method": reduction_method.__name__, 
                   "maximum number of epoches": max_epoch_num,
                   "teacher forcing ratio": teacher_forcing_ratio, 
                   "dev acc threshold for saving model": acc_threshold,
                   "train acc exist threshold": train_acc_exit,
                   "dev accu exist threshold": eval_acc_exit, 
                   "num of trainable parameters": count_parameters(model)}
                   
save_dict_as_json(hyperparameters, join(log_folder, "hyperparameters.json"))

Experiments_Logs/Experiment#8/LSTM/hyperparameters.json saved!


In [None]:
from google.colab import runtime
runtime.unassign()