## Accessing the project directory on my Google Drive

In [1]:
import os
from google.colab import drive

drive.mount('/drive', force_remount=False)

Mounted at /drive


In [2]:
project_dir = "/drive/My Drive/RNN_seq2seq"

# change working directory to project_dir
os.chdir(project_dir)

# Statistics

## Get all and best main results across all tasks

In [3]:
import pandas as pd

In [4]:
def get_main_res_df(folder_name):
    main_res_fp = f"experiments/RESULTS_CAMERA_READY/{folder_name}/summarized_main_results_all_tasks.csv"
    df = pd.read_csv(main_res_fp)
    return df


def _extract(df, conds):
    cond = []
    for k, v in conds.items():
        cond.append(df[k] == v)

    cond = list(map(all, zip(*cond)))
    out = df[cond]
    return out

In [5]:
folder_name = "main"
df = get_main_res_df(folder_name)
df

Unnamed: 0,Task Name,RNN,Attention,Dataset,Metric,Max,Min,Mean,Std
0,identity,SRNN,True,Train,Loss,0.004613,0.003796,0.004123,0.000433
1,identity,SRNN,True,Train,Full Sequence Accuracy,1.000000,0.999900,0.999967,0.000058
2,identity,SRNN,True,Train,First N-symbol Accuracy,1.000000,0.999975,0.999992,0.000014
3,identity,SRNN,True,Train,Overlap Rate,1.000000,0.999988,0.999996,0.000007
4,identity,SRNN,True,Dev,Loss,0.008488,0.005006,0.006216,0.001970
...,...,...,...,...,...,...,...,...,...
379,quadratic_copying,LSTM,False,Test,Overlap Rate,0.762351,0.697804,0.737460,0.034715
380,quadratic_copying,LSTM,False,Gen,Loss,9.799124,8.598423,9.368459,0.668431
381,quadratic_copying,LSTM,False,Gen,Full Sequence Accuracy,0.002110,0.000070,0.001330,0.001101
382,quadratic_copying,LSTM,False,Gen,First N-symbol Accuracy,0.109765,0.098585,0.104208,0.005590


In [6]:
measure = "Std"
metric = "Full Sequence Accuracy"

train = _extract(df, {"Dataset": "Train", "Metric": metric})[measure]
test = _extract(df, {"Dataset": "Test", "Metric": metric})[measure]
gen = _extract(df, {"Dataset": "Gen", "Metric": metric})[measure]

In [7]:
pd.Series(train.to_list()).corr(pd.Series(test.to_list()), method="kendall"), pd.Series(train.to_list()).corr(pd.Series(gen.to_list()), method="kendall")

(0.7893469648853851, -0.35275027615200993)

In [8]:
pd.Series(train.to_list()).corr(pd.Series(test.to_list()), method="spearman"), pd.Series(train.to_list()).corr(pd.Series(gen.to_list()), method="spearman")

(0.8901182489216504, -0.5168693036591364)