In [1]:
import pandas as pd
import numpy as np

import sys
sys.path.append("..")
import utils.utils as utils
import utils.postprocessing_utils as postpro_utils

In [2]:
task = "sentiment"
results_path = "../results/"

In [3]:
results = pd.read_excel(results_path + "results_{}.xlsx".format(task), sheet_name=None)
baselines = pd.read_excel(results_path + "baselines_{}.xlsx".format(task), sheet_name=None)

In [5]:
with pd.ExcelWriter(results_path + "results_{}_postprocessed.xlsx".format(task)) as writer:
    for sheet_name, df in results.items():
        # Add empty column for missing training languages
        df = postpro_utils.fill_missing_columns(df)
        # Reorder columns so that they match the order of testing languages
        df = postpro_utils.reorder_columns(df)
        # Add language groups
        df = utils.add_lang_groups(df, "Group")
        # Add baseline
        df["Baseline"] = baselines[sheet_name]["Baseline"]
        
        # Change language column name
        output1 = df.rename(columns={utils.find_lang_column(df): "Test\Train"})
        output1.to_excel(writer, index=False, sheet_name=sheet_name)
        
        # Mean of train languages by test language group
        df_by_test_group = postpro_utils.mean_exclude_by_group(df).set_index("Group")
        
        output2 = df_by_test_group.copy()
        output2.insert(loc=0, column=np.nan, value=[np.nan]*df_by_test_group.shape[0])
        output2.to_excel(writer, sheet_name=sheet_name, startrow=df.shape[0] + 5)
        
        # Mean of previous means by train language group
        df_by_both_group = df_by_test_group.drop("Baseline", axis=1)
        df_by_both_group = df_by_both_group.transpose().reset_index().rename(columns={"index": "Train_langs"})
        df_by_both_group = utils.add_lang_groups(df_by_both_group, "Train Group")
        df_by_both_group = df_by_both_group.groupby(["Train Group"]).mean()
        df_by_both_group = df_by_both_group.reindex(["Fusional", "Isolating", "Agglutinative", "Introflexive"]).transpose()
        
        output3 = df_by_both_group.rename_axis("Test\Train")
        output3.to_excel(writer, sheet_name=sheet_name, startrow=df.shape[0] + df_by_test_group.shape[0] + 10)