In [1]:
import pandas as pd
import numpy as np
import xlsxwriter

import sys
sys.path.append("..")
import utils.utils as utils
import utils.postprocessing_utils as postpro_utils

In [2]:
task = "sentiment"
results_path = "../results/"

In [3]:
results = pd.read_excel(results_path + "results_{}.xlsx".format(task), sheet_name=None)
baselines = pd.read_excel(results_path + "baselines_{}.xlsx".format(task), sheet_name=None)

In [4]:
def make_format(workbook, cell_value, coln, row_max=None):
    color_dict = {
        "Fusional": "#95c78f",
        "Isolating": "#f79d97",
        "Agglutinative": "#abaff5",
        "Introflexive": "#fffecc"
    }
    lang_to_group = utils.make_lang_group_dict()
    grey = "#d1d1d1"
    
    # Default values
    bold = False
    underline = False
    color = 0
    border = 0
    
    # Alignment
    if coln < 2:
        align = "left"
    else:
        align = "right"
        
    # String, numeric or NaN
    if isinstance(cell_value, str) and cell_value != "-":
        bold = True
        border = 1
        # Pick color
        if cell_value in color_dict.keys():
            color = color_dict[cell_value]
        elif cell_value in lang_to_group.keys():
            color = color_dict[lang_to_group[cell_value]]
        else:
            color = grey
    elif cell_value == row_max:
        underline = True
        bold = True
    
    return workbook.add_format({"bold": bold, "underline": underline, "align": align, 
                                "num_format": "0.000", "bg_color": color, "border": border})

In [5]:
def row_maxs(table):
    return table.loc[:,postpro_utils.find_training_langs(table)].apply(
        lambda x: table.columns.tolist().index(x.astype(float).idxmax()), axis=1
    ).values

In [6]:
def write_to_sheet(table, worksheet, start, header=True):
    max_locs = row_maxs(table)
    
    # Column names
    if header:
        for coln in range(table.shape[1]):
            worksheet.write(start, coln, table.columns[coln], make_format(workbook, cell_value=table.columns[coln], coln=coln))
        
    # Values
    for rown in range(start + 1, table.shape[0] + start + 1):
        i = rown - start - 1
        for coln in range(table.shape[1]):
            cell_value = table.values[i, coln]
            worksheet.write(rown, coln, cell_value, make_format(workbook, cell_value=cell_value, coln=coln,
                                                                row_max=table.iloc[i, max_locs[i]]))
                
    return worksheet

In [7]:
def calc_mean_over_others(table):
    def mean(x, table):
        if table.columns.get_loc(x.name) == 0:
            return "Mean over others"
        elif (x == "-").all():
            return "-"
        elif x.apply(lambda y: isinstance(y, float)).all():
            return (x[table["Test\Train"] != x.name]).mean()

    return table.apply(lambda x: mean(x, table)).to_frame().T

In [8]:
workbook = xlsxwriter.Workbook(results_path + "results_{}_postprocessed.xlsx".format(task))
space = 6

for sheet_name, df in results.items():
    worksheet = workbook.add_worksheet(sheet_name)
    
    df = utils.order_table(df)
    # Add empty column for missing training languages
    df = postpro_utils.fill_missing_columns(df)
    # Reorder columns so that they match the order of testing languages
    df = postpro_utils.reorder_columns(df)
    # Add language groups
    df = utils.add_lang_groups(df, "Group")
    # Add baseline
    df["Baseline"] = baselines[sheet_name]["Baseline"]

    # Change language column name
    output1 = df.rename(columns={utils.find_lang_column(df): "Test\Train"})
    output1 = output1.fillna("-")
    
    # Write to sheet
    worksheet.set_column(0, 1, 16) # Column width
    worksheet.set_column(1, output1.shape[1], 12)
    worksheet = write_to_sheet(output1, worksheet, start=0)

    # Mean of train languages by test language group
    df_by_test_group = postpro_utils.mean_exclude_by_group(df).set_index("Group")

    output2 = df_by_test_group.copy()
    output2 = output2.fillna("-").rename_axis("Test\Train").reset_index()
    output2.insert(loc=1, column=None, value=[None]*output2.shape[0])
    
    # Write to sheet
    worksheet = write_to_sheet(output2, worksheet, start=df.shape[0] + space)

    # Mean of previous means by train language group
    df_by_both_group = df_by_test_group.drop("Baseline", axis=1)
    df_by_both_group = df_by_both_group.transpose().reset_index().rename(columns={"index": "Train_langs"})
    df_by_both_group = utils.add_lang_groups(df_by_both_group, "Train Group")
    df_by_both_group = df_by_both_group.groupby(["Train Group"]).mean()
    df_by_both_group = df_by_both_group.reindex(["Fusional", "Isolating", "Agglutinative", "Introflexive"]).transpose()

    output3 = df_by_both_group.rename_axis("Test\Train")
    output3 = output3.reset_index()
    output3.insert(loc=1, column=None, value=[None]*output3.shape[0])
    
    # Write to sheet
    worksheet = write_to_sheet(output3, worksheet, start=df.shape[0] + df_by_test_group.shape[0] + space * 2)
    
    # Mean over others for every column
    start = 1 - space
    for i, table in enumerate([output1, output2, output3]):
        start += table.shape[0] + space
        worksheet = write_to_sheet(calc_mean_over_others(table), worksheet, start=start, header=False)
    
workbook.close()