A notebook that puts evaluation results into a latex table format and prints the result.
The results are grouped by result type, e.g., median, max etc.

In [None]:
import os
import pandas as pd
from mtqe.utils.paths import EVAL_DIR
from mtqe.utils.tables import create_latex_table

In [None]:
# Set the data split to be evaluated, and the metrics to be shown in the table
DATA_SPLIT = 'test'
# VALUES = ['precision', 'recall']
VALUES = ['MCC']

In [None]:
folders = os.listdir(EVAL_DIR)

In [None]:
li_max_results = []
li_min_results = []
li_med_results = []
li_mean_results = []
li_ensemble_results = []

In [None]:
# Collect results for all experiment groups
for folder in folders:
    path = os.path.join(EVAL_DIR, folder)
    if os.path.isdir(path):
        files = os.listdir(path)
        for file in files:
            df = pd.read_csv(os.path.join(EVAL_DIR, folder, file))
            if file.endswith('ensemble_results.csv'):
                li_ensemble_results.append(df)
            elif file.endswith('max_results.csv'):
                li_max_results.append(df)
            elif file.endswith('min_results.csv'):
                li_min_results.append(df)
            elif file.endswith('median_results.csv'):
                li_med_results.append(df)
            elif file.endswith('mean_results.csv'):
                li_mean_results.append(df)

df_ensemble = pd.concat(li_ensemble_results)
df_max = pd.concat(li_max_results)
df_min = pd.concat(li_min_results)
df_med = pd.concat(li_med_results)
df_mean = pd.concat(li_mean_results)

In [None]:
def update_exp_group_names(row):
    if row['exp_group'][-4:] == 'enja':
        row['exp_group'] = row['exp_group'][:-5]
    return row

In [None]:
# Move en-ja results to same group as other language pairs, if they are separate.
df_max = df_max.apply(update_exp_group_names, axis=1)
df_min = df_min.apply(update_exp_group_names, axis=1)
df_med = df_med.apply(update_exp_group_names, axis=1)
df_mean = df_mean.apply(update_exp_group_names, axis=1)
df_ensemble = df_ensemble.apply(update_exp_group_names, axis=1)

In [None]:
# Filter based on threshold strategy and data split - can create more dataframes here, as required
df_max_default = df_max[(df_max['threshold_strategy']=='default') & (df_max['split'] == DATA_SPLIT)]
df_min_default = df_min[(df_min['threshold_strategy']=='default') & (df_min['split'] == DATA_SPLIT)]
df_med_default = df_med[(df_med['threshold_strategy']=='default') & (df_med['split'] == DATA_SPLIT)]
df_mean_default = df_mean[(df_mean['threshold_strategy']=='default') & (df_mean['split'] == DATA_SPLIT)]
df_ensemble_best = df_ensemble[(df_ensemble['threshold_strategy']=='best') & (df_ensemble['split'] == DATA_SPLIT)]

In [None]:
# Pivot the results for the metrics (values) to be shown in the table
df_max_default = pd.pivot_table(df_max_default, index='exp_group', columns='language_pair', values=VALUES)
df_max_default = df_max_default.rename_axis(None, axis=0)

In [None]:
df_min_default = pd.pivot_table(df_min_default, index='exp_group', columns='language_pair', values=VALUES)
df_min_default = df_min_default.rename_axis(None, axis=0)

In [None]:
df_med_default = pd.pivot_table(df_med_default, index='exp_group', columns='language_pair', values=VALUES)
df_med_default = df_med_default.rename_axis(None, axis=0)

In [None]:
df_mean_default = pd.pivot_table(df_mean_default, index='exp_group', columns='language_pair', values=VALUES)
df_mean_default = df_mean_default.rename_axis(None, axis=0)

In [None]:
df_ensemble_best = pd.pivot_table(df_ensemble_best, index='exp_group', columns='language_pair', values=VALUES)
df_ensemble_best = df_ensemble_best.rename_axis(None, axis=0)

In [None]:
df_med_default.shape

In [None]:
def create_table(df_data):
    # Create table for columns and content 
    # NOTE: Should make this a function and pass the dataframe through as a parameter
    if len(VALUES) == 1:
        col_names = ['experiment_group', 'en-cs', 'en-de', 'en-ja', 'en-zh']
        di_data = {df_data.index[i]: [df_data.iloc[i,0], df_data.iloc[i,1], df_data.iloc[i,2], df_data.iloc[i,3]] for i in range(len(df_data))}
        li_data = create_latex_table(col_names, di_data)
    elif len(VALUES) == 2: # Assume it's precision & recall
        col_names = ['experiment_group', 'en-cs', '', 'en-de', '', 'en-ja', '', 'en-zh', '']
        di_data = {df_data.index[i]: [df_data.iloc[i,0], df_data.iloc[i,4], df_data.iloc[i,1], df_data.iloc[i,5], df_data.iloc[i,2], df_data.iloc[i,6], df_data.iloc[i,3], df_data.iloc[i,7]] for i in range(len(df_data))}
        li_data = create_latex_table(col_names, di_data)
    print(li_data)

In [None]:
# Print the data of the median results for default threshold
create_table(df_med_default)

In [None]:
def create_table_two_dfs(df_data_1, df_data_2):
    # Create table for columns and content 
    # NOTE: Should make this a function and pass the dataframe through as a parameter
    if len(VALUES) == 1:
        col_names = ['experiment_group', 'en-cs','en-cs', 'en-de', 'en-de', 'en-ja', 'en-ja', 'en-zh', 'en-zh']
        di_data = {df_data_1.index[i]: [df_data_1.iloc[i,0], df_data_2.iloc[i,0], df_data_1.iloc[i,1], df_data_2.iloc[i,1], df_data_1.iloc[i,2], df_data_2.iloc[i,2], df_data_1.iloc[i,3], df_data_2.iloc[i,3]] for i in range(len(df_data_1))}
        li_data = create_latex_table(col_names, di_data)
    print(li_data)

In [None]:
create_table_two_dfs(df_min_default, df_max_default)