In [22]:
import json
import numpy as np
import os

def print_tex_result(results_file, primary_metrics, percent_metrics):
    with open(results_file, 'r') as f:
        results = json.load(f)

    task_metrics = {}
    for task, metric_name in primary_metrics.items():
        if task in results and metric_name in results[task]:
            value = results[task][metric_name]
            if metric_name in percent_metrics:
                task_metrics[task] = value * 100
            else:
                task_metrics[task] = value
        else:
            task_metrics[task] = np.nan
    return task_metrics


In [27]:
results_dir = "evaluations_new_metric/none-1B/20"
is_global_model = False

primary_metrics = {
    "coreference": "accuracy",
    "entailment": "accuracy",
    "linguistic_acceptability": "accuracy",
    "paraphrase": "f1_score",
    "question_classification": "accuracy",
    "structure_to_text": "rougeL",
    "text_formatting": "rougeL",
    "word_disambiguation": "f1_score"
}
percent_metrics = {"accuracy", "f1_score", "rougeL"}
latex_order = list(primary_metrics.keys())

if is_global_model:
    results_path = os.path.join(results_dir, "global_output_metrics.json")
    metrics = print_tex_result(results_path, primary_metrics, percent_metrics)
    latex_row = " & ".join([f"{metrics[task]:.2f}" for task in latex_order]) + " \\\\"
    print(latex_row)

else:
    all_rows = []

    for i in range(8):
        results_path = os.path.join(results_dir, f"client_{i}_output_metrics.json")
        task_metrics = print_tex_result(results_path, primary_metrics, percent_metrics)
        all_rows.append([task_metrics[task] for task in latex_order])

        row_str = " & ".join([f"{task_metrics[task]:.2f}" for task in latex_order])
        print(f"& {i} & {row_str} \\\\")

    all_array = np.array(all_rows)
    avg_values = np.nanmean(all_array, axis=0)
    avg_row_str = " & ".join([f"{v:.2f}" for v in avg_values])
    print(f"& avg & {avg_row_str} \\\\")

& 0 & 55.00 & 34.50 & 64.50 & 57.26 & 28.00 & 28.88 & 67.39 & 50.43 \\
& 1 & 54.50 & 38.50 & 64.50 & 69.96 & 30.00 & 29.96 & 73.50 & 54.39 \\
& 2 & 50.50 & 34.50 & 63.00 & 74.66 & 30.00 & 29.85 & 68.12 & 62.37 \\
& 3 & 54.50 & 34.50 & 64.50 & 22.50 & 35.00 & 29.43 & 71.07 & 64.08 \\
& 4 & 54.00 & 34.50 & 64.50 & 60.68 & 39.00 & 29.34 & 69.72 & 60.93 \\
& 5 & 51.00 & 28.00 & 43.50 & 20.28 & 13.00 & 33.54 & 73.22 & 23.85 \\
& 6 & 44.00 & 23.50 & 39.00 & 17.51 & 14.50 & 33.80 & 90.02 & 21.30 \\
& 7 & 53.00 & 34.50 & 64.50 & 67.47 & 30.50 & 30.28 & 72.53 & 61.36 \\
& avg & 52.06 & 32.81 & 58.50 & 48.79 & 27.50 & 30.63 & 73.20 & 49.84 \\
