# Imports

In [None]:
# standard library imports
# /

# related third party imports
import structlog

# local application/library specific imports
from tools.configurator import (
    get_configs_out,
    get_config_ids,
)
from tools.analyzer import (
    print_table_from_dict,
    print_df_from_dict,
    get_results_dict,
    merge_all_results,
    create_config_id_print,
)


logger = structlog.get_logger(__name__)

In [None]:
##### INPUTS #####
EXP_NAME = "experiment_20250514-100140"  # "experiment_auto_20250502-091157"  # "experiment_20250414-094538"
EXCLUDE_METRICS = ["val_acc_true_student", "val_acc_true_pred", "val_f1_true_student", "val_f1_true_pred"]
LEGEND_EXACT = True

In [None]:
METRIC2LEGEND_DICT = {
    "val_acc_student_pred": "val acc LLM -> student",
    "val_acc_true_student": "val acc student -> true",
    "val_acc_true_pred": "val acc LLM -> true",
    "val_prop_invalid": "val proportion invalid",
    "val_f1_student_pred": "val f1 LLM -> student",
    "val_f1_true_student": "val f1 student -> true",
    "val_f1_true_pred": "val f1 LLM -> true",
}

In [None]:
configs = get_configs_out(EXP_NAME)
config_ids = get_config_ids(configs)
config_dict = {config_id: cfg for config_id, cfg in zip(config_ids, configs)}

CONFIG2LEGEND_DICT = {
    config_id: create_config_id_print(config_id) for config_id in config_ids
}
legend_kwargs = {
    "config2legend": CONFIG2LEGEND_DICT,
    "legend_exact": LEGEND_EXACT,
    "metric2legend": METRIC2LEGEND_DICT,
}

In [None]:
# merge results for all configs
run_id_dict = merge_all_results(EXP_NAME, config_ids)

# Val/Test set performance

In [None]:
results_dict = get_results_dict(
    exp_name=EXP_NAME,
    config_ids=config_ids,
    run_id=None,
)
# # NOTE: print paper-like table with this code
# print_table_from_dict(
#     eval_dict=results_dict,
#     exp_name=EXP_NAME,
#     exclude_metrics=EXCLUDE_METRICS,
#     decimals=3,
#     **legend_kwargs,
# )

In [None]:
# NOTE: print dataframe
df = print_df_from_dict(
    eval_dict=results_dict,
    exp_name=EXP_NAME,
    exclude_metrics=EXCLUDE_METRICS,
    **legend_kwargs,
    # save=True,
    # save_kwargs={"fname": os.path.join("output", EXP_NAME, "results.csv")},
)

df.xs('mean', axis=1, level=1, drop_level=False)