## Accessing the project directory on my Google Drive

In [1]:
import os
from google.colab import drive

drive.mount('/drive', force_remount=False)

Mounted at /drive


In [2]:
project_dir = "/drive/My Drive/RNN_seq2seq"

# change working directory to project_dir
os.chdir(project_dir)

## Dependencies

In [3]:
import random
import pandas as pd 
from string import ascii_lowercase
from IPython.display import clear_output

from os import makedirs
from os.path import join

import torch.nn as nn
import torch.optim as optim

from scripts.utils import read_datasets, save_dict_as_json
from scripts.dataloader import get_text_encoder_decoder, customize_dataloader_func
from scripts.visualization import plot_training_log, plot_performances_per_seq_len
from scripts.pytorch_utils import get_model, count_parameters, train_and_evaluate, evaluate, get_results

## Log time and random seeds

In [4]:
from datetime import datetime
import random 
import numpy as np
import torch

now = datetime.now()
print("Time stamp:", now.strftime("%Y-%m-%d %H:%M:%S"))

seed = 3266473
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

Time stamp: 2023-01-23 01:33:04


<torch._C.Generator at 0x7f074b3d4510>

## Data loading and processing

In [5]:
train, dev, test, gen = read_datasets("data/input_spec_red")

In [6]:
len(train), len(dev), len(test), len(gen)

(30000, 30000, 150000, 300000)

In [7]:
in_vocab = ascii_lowercase + "@"
out_vocab = ascii_lowercase + "@"
in_seq_encoder, in_seq_decoder = get_text_encoder_decoder(in_vocab)
out_seq_encoder, out_seq_decoder = get_text_encoder_decoder(out_vocab)

In [8]:
dataloader_func = customize_dataloader_func(in_seq_encoder, 
                                            out_seq_encoder, 
                                            padding_idx=1, 
                                            batch_size=1000, 
                                            shuffle=False)

train_dl = dataloader_func(train)
dev_dl = dataloader_func(dev)
test_dl = dataloader_func(test, batch_size=5000)
gen_dl = dataloader_func(gen, batch_size=5000)

## Experiments

In [None]:
task_name = "input_spec_red"

metadata = []
main_results = []
task_folder = join("experiments", task_name)
meta_col = ["RNN", "Attention", "lr", "Hidden size", "Embedding size", "Param #", "Max Epoch #"]
main_res_col = ["Run #", "RNN", "Attention", "Dataset", "Loss", 
                "Full Sequence Accuracy", "First N-symbol Accuracy", "Overlap Rate"]

for rnn_type in ["SRNN", "GRU", "LSTM"]:
    for use_attn in [True, False]:
        for run_num in range(1, 4):
            
            print(f"RNN type: {rnn_type}; use attention: {use_attn}; run num: {run_num}\n")
            
            embd_dim = 128
            hidden_size = 512
            learning_rate = 5e-4
            weight_decay = 1e-5
            max_epoch_num = 500
            
            ModelConfig = {"rnn_type": rnn_type,
                           "embd_dim": embd_dim,
                           "hidden_size": hidden_size,
                           "device": "cuda",
                           "num_layers": 1,
                           "dropout_rate": 0.0,
                           "use_attention": use_attn,
                           "bidirectional": False,
                           "in_vocab_size": len(in_vocab) + 2, # for the two boundary symbols 
                           "out_vocab_size": len(out_vocab) + 2, # for the two boundary symbols 
                           "reduction_method": "sum"}
            
            model = get_model(ModelConfig)
            
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

            attn = "attn" if use_attn else "attn-less"
            folder = join(task_folder, rnn_type, attn, str(run_num))
            makedirs(folder, exist_ok=True)
            
            saved_model_fp = join(folder, "model.pt")
            log = train_and_evaluate(model, train_dl, dev_dl, 
                                     criterion, optimizer, 
                                     saved_model_fp=saved_model_fp, 
                                     acc_threshold=-0.1, 
                                     print_eval_freq=10, 
                                     max_epoch_num=max_epoch_num, 
                                     train_exit_acc=0.9999, 
                                     eval_exit_acc=0.995, 
                                     teacher_forcing_ratio=1.0)
            
            save_dict_as_json(log, join(folder, "training_log.json"))
            save_dict_as_json(ModelConfig, join(folder, "ModelConfig.json"))
            plot_training_log(log, show_plot=False, saved_plot_fp=join(folder, "training_plot.png"))
            
            train_res, dev_res = get_results(log, train_log=True)
            
            test_aggr = evaluate(model, test_dl, criterion)
            gen_aggr = evaluate(model, gen_dl, criterion)
            
            test_res = get_results(test_aggr, train_log=False)
            gen_res = get_results(gen_aggr, train_log=False)
            
            for res, ds in zip([train_res, dev_res, test_res, gen_res], ["Train", "Dev", "Test", "Gen"]):
                main_results.append([run_num, rnn_type, use_attn, ds] + res)
            
            clear_output(wait=True)
        
        param_num = count_parameters(model)
        metadata.append([rnn_type, use_attn, learning_rate, hidden_size, embd_dim, param_num, max_epoch_num])

pd.DataFrame(metadata, columns=meta_col).to_csv(join(task_folder, "metadata.csv"), index=False)
pd.DataFrame(main_results, columns=main_res_col).to_csv(join(task_folder, "main_results.csv"), index=False)

RNN type: LSTM; use attention: False; run num: 3

The model has 3,177,245 trainable parameters
Current epoch: 10, 
training performance: {'loss': 3.39867688814799, 'full sequence accuracy': 0.0, 'first n-symbol accuracy': 0.0032501594385448192, 'overlap rate': 0.08379831779973834}
evaluation performance: {'loss': 3.4051248391469318, 'full sequence accuracy': 0.0, 'first n-symbol accuracy': 0.003038822903747133, 'overlap rate': 0.08282252346961376}

Current epoch: 20, 
training performance: {'loss': 3.2332709630330405, 'full sequence accuracy': 0.0, 'first n-symbol accuracy': 0.031506862564235565, 'overlap rate': 0.15299601543038824}
evaluation performance: {'loss': 3.2522762934366862, 'full sequence accuracy': 0.0, 'first n-symbol accuracy': 0.02962420612890535, 'overlap rate': 0.15018907571790316}

Current epoch: 30, 
training performance: {'loss': 3.4123722553253173, 'full sequence accuracy': 0.0007666666666666667, 'first n-symbol accuracy': 0.12625420759421419, 'overlap rate': 0.288

## Automatically disconnect and delete the runtime 

In [None]:
from google.colab import runtime
runtime.unassign()