In [60]:
import os
import pandas as pd
from mtqe.utils.paths import EVAL_DIR
from mtqe.utils.tables import create_latex_table

In [2]:
folders = os.listdir(EVAL_DIR)

In [4]:
li_max_results = []
li_ensemble_results = []

In [21]:
for folder in folders:
    files = os.listdir(os.path.join(EVAL_DIR, folder))
    for file in files:
        if file.endswith('ensemble_results.csv'):
            df = pd.read_csv(os.path.join(EVAL_DIR, folder, file))
            li_ensemble_results.append(df)
        elif file.endswith('max_results.csv'):
            df = pd.read_csv(os.path.join(EVAL_DIR, folder, file))
            li_max_results.append(df)

df_ensemble = pd.concat(li_ensemble_results)
df_max = pd.concat(li_max_results)

In [33]:
df_max.head()

Unnamed: 0.1,Unnamed: 0,threshold,MCC,precision,recall,f1,accuracy,threshold_strategy,language_pair,split,seed,model_type,exp_group
0,1,0.63,0.493971,0.55625,0.589404,0.572347,0.867,best,en-cs,dev,2710,supervised,train_monolingual_auth_data
1,6,0.5,0.478523,0.63125,0.515306,0.567416,0.846,default,en-cs,dev,89,supervised,train_monolingual_auth_data
2,5,0.1,0.395604,0.81875,0.345646,0.486085,0.723,extreme,en-cs,dev,928,supervised,train_monolingual_auth_data
3,19,0.6,0.469422,0.582011,0.561224,0.571429,0.835,best,en-cs,test,928,supervised,train_monolingual_auth_data
4,24,0.5,0.471864,0.597884,0.553922,0.575064,0.833,default,en-cs,test,42,supervised,train_monolingual_auth_data


In [44]:
def update_exp_group_names(row):
    if row['exp_group'][-4:] == 'enja':
        row['exp_group'] = row['exp_group'][:-5]
    return row

In [50]:
df_max = df_max.apply(update_exp_group_names, axis=1)
df_ensemble = df_ensemble.apply(update_exp_group_names, axis=1)

In [68]:
df_max_best = df_max[(df_max['threshold_strategy']=='best') & (df_max['split'] == 'test')]
df_ensemble_best = df_ensemble[(df_ensemble['threshold_strategy']=='best') & (df_ensemble['split'] == 'test')]

In [70]:
df_max_best

language_pair,en-cs,en-de,en-ja,en-zh
second_step_base_auth_data,0.501923,0.470854,0.278238,0.319684
second_step_base_demetr_auth_data,0.487002,0.524198,0.270935,0.25078
second_step_base_demetr_data,0.507898,0.487507,0.278238,0.266324
second_step_base_wmt22_data,,0.477558,,
second_step_base_wmt22_small_data,,0.47932,,
train_monolingual_auth_data,0.469422,0.53196,0.180096,0.325518
train_multilingual_auth_data_all,0.494843,0.531326,0.27911,0.303355
train_multilingual_auth_data_single,0.496524,0.518174,0.279338,0.319684
train_multilingual_auth_demetr_data_single,0.423552,0.519128,0.22659,0.261436
train_multilingual_auth_wmt22_data_single,,0.4962,,


In [69]:
df_max_best = pd.pivot_table(df_max_best, index='exp_group', columns='language_pair', values='MCC')
df_max_best = df_max_best.rename_axis(None, axis=0)

In [71]:
df_ensemble_best = pd.pivot_table(df_ensemble_best, index='exp_group', columns='language_pair', values='MCC')
df_ensemble_best = df_ensemble_best.rename_axis(None, axis=0)

In [72]:
df_ensemble_best

language_pair,en-cs,en-de,en-ja,en-zh
second_step_base_auth_data,0.48879,0.451037,0.278365,0.307412
second_step_base_demetr_auth_data,0.484948,0.514232,0.140752,0.243221
second_step_base_demetr_data,0.490843,0.487507,0.265552,0.251211
second_step_base_wmt22_data,,0.488046,,
second_step_base_wmt22_small_data,,0.473411,,
train_monolingual_auth_data,0.465131,0.485272,0.149869,0.262482
train_multilingual_auth_data_all,0.471864,0.527017,0.271818,0.282471
train_multilingual_auth_data_single,0.475846,0.51488,0.204108,0.300273
train_multilingual_auth_demetr_data_single,0.409911,0.502479,0.189899,0.250003
train_multilingual_auth_wmt22_data_single,,0.471371,,


In [87]:
di_max_best = {df_max_best.index[i]: [df_max_best.iloc[i,0], df_max_best.iloc[i,1], df_max_best.iloc[i,2], df_max_best.iloc[i,3]] for i in range(len(df_max_best))}
li_max_best = create_latex_table(df_max_best.columns.names, di_max_best)

In [90]:
di_ensemble_best = {df_ensemble_best.index[i]: [df_ensemble_best.iloc[i,0], df_ensemble_best.iloc[i,1], df_ensemble_best.iloc[i,2], df_ensemble_best.iloc[i,3]] for i in range(len(df_ensemble_best))}
li_ensemble_best = create_latex_table(['experiment group', 'en-cs', 'en-de', 'en-ja', 'en-zh'], di_ensemble_best)

In [92]:
print('\n'.join(li_ensemble_best))

\begin{table}
\centering
\begin{tabular}{c|ccccc}
 & EXPERIMENT GROUP & EN-CS & EN-DE & EN-JA & EN-ZH\\
\hline
second_step_base_auth_data & 0.489 & 0.451 & 0.278 & 0.307 \\
second_step_base_demetr_auth_data & 0.485 & 0.514 & 0.141 & 0.243 \\
second_step_base_demetr_data & 0.491 & 0.488 & 0.266 & 0.251 \\
second_step_base_wmt22_data & nan & 0.488 & nan & nan \\
second_step_base_wmt22_small_data & nan & 0.473 & nan & nan \\
train_monolingual_auth_data & 0.465 & 0.485 & 0.150 & 0.262 \\
train_multilingual_auth_data_all & 0.472 & 0.527 & 0.272 & 0.282 \\
train_multilingual_auth_data_single & 0.476 & 0.515 & 0.204 & 0.300 \\
train_multilingual_auth_demetr_data_single & 0.410 & 0.502 & 0.190 & 0.250 \\
train_multilingual_auth_wmt22_data_single & nan & 0.471 & nan & nan \\
\hline
\end{tabular}
\end{table}
