## Accessing the project directory on my Google Drive

In [None]:
import os
from google.colab import drive

drive.mount('/drive', force_remount=False)

Mounted at /drive


In [None]:
project_dir = "/drive/My Drive/RNN_seq2seq"

# change working directory to project_dir
os.chdir(project_dir)

In [None]:
from os.path import join


results_folder = "experiments/RESULTS_FollowUp"
os.makedirs(results_folder, exist_ok=True)

# plots_folder = join(results_folder, "plots")
# os.makedirs(plots_folder, exist_ok=True)

# Statistics

## Get all and best main results across all tasks

In [None]:
import pandas as pd

In [None]:
def get_main_res_df(task_name):
    main_res_fp = f"experiments/{task_name}/main_results.csv"
    df = pd.read_csv(main_res_fp)
    return df


def _extract(df, conds):    
    cond = []
    for k, v in conds.items():
        cond.append(df[k] == v)

    cond = list(map(all, zip(*cond)))
    out = df[cond]
    return out

In [None]:
all_res = []
best_res = []

gen_weight, test_weight = 0.6, 0.4
cond_metric_name = "Full Sequence Accuracy"
res_col = ["Task Name", "Run #", "RNN", "Attention", 
           "Dataset", "Loss", "Full Sequence Accuracy",
           "First N-symbol Accuracy", "Overlap Rate"]

task_names = ["total_red_attn_learn_efficiency", "total_red_sample_complexity"]
# in my initial implementation, 
# I did not do per-sequence-length evaluation, hence RE_EVAL
for task_name, attn in zip(task_names, [True, False]):
    task_res_df = get_main_res_df(task_name)
    
    for rnn_type in ["SRNN", "GRU", "LSTM"]:
            conds = {"RNN": rnn_type, "Attention": attn}
            sub_df = _extract(task_res_df, conds)
            
            best_run_num, best_perf = 1, 0
            for run_num in range(1, 4):
                cur_df = _extract(sub_df, {"Run #": run_num})
                test_perf = cur_df[cur_df.Dataset == "Test"]
                test_perf = test_perf[cond_metric_name].item()
                gen_perf = cur_df[cur_df.Dataset == "Gen"]
                gen_perf = gen_perf[cond_metric_name].item()
                perf = gen_perf * gen_weight + test_perf * test_weight
                
                if perf > best_perf:
                    best_perf = perf
                    best_run_num = run_num

            conds.update({"Run #": best_run_num})
            best_sub_df = _extract(task_res_df, conds)
            best_res.extend([[task_name] + l for l in best_sub_df.to_numpy().tolist()])
    
    task_res_df.insert(0, "Task Name", task_name)
    all_res.append(task_res_df)

all_res_df = pd.concat(all_res)
all_res_df.to_csv(join(results_folder, "all_main_results_all_tasks.csv"), index=False)
best_res_df = pd.DataFrame(best_res, columns=res_col)
best_res_df.to_csv(join(results_folder, "best_main_results_all_tasks.csv"), index=False)