In [1]:
import pandas as pd
from src.metrics import brier_score

In [2]:
graphics_path = "../images/pdf/"
tables_path = "../tables/"
list_of_all_models = [
    "bert-base-cased", "bert-large-cased", "gemma-2b", "gemma-7b",
    "gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl", "opt-125m",
    "opt-350m", "opt-1.3b", "opt-6.7b", "roberta-base", "roberta-large",
    "xlm-roberta-base", "xlm-roberta-large"
]
instance_lvl_results = pd.read_json(
    "../../scores/BEAR/instance_level_results.json",
    orient="records",
    lines=True
)

## Table: Accuracy and Brier Scores for CLMs and MLMs on most similar domain

In [3]:
pairs_with_domain = [
    ("opt-125m", "roberta-base", "Arts"),
    ("gpt2-medium", "bert-base-cased", "Economic"),
    ("gpt2-large", "roberta-large", "Biographical"),
    ("opt-350m", "bert-large-cased", "Movies"),
    ("gpt2", "xlm-roberta-large", "Arts"),
]
estimators = [
    "margin_conf", "average_conf_voting_2", "average_conf_min_conf",
    "consistency_conf_voting_2", "consistency_conf_min_conf"
]
scores_exploded = instance_lvl_results.explode("domains")
results = []

for m1, m2, domain in pairs_with_domain:
    domain_scores = scores_exploded.query("domains == @domain")

    for model in [m1, m2]:
        model_scores = domain_scores.query("model == @model")
        accuracy = model_scores.bear_score.mean()
        bs_margin = brier_score(model_scores.correctly_predicted, model_scores.margin_conf)
        bs_average = brier_score(model_scores.min_conf_correct, model_scores.average_conf_min_conf)
        results.append({
            "model": model,
            "domain": domain,
            "accuracy": accuracy,
            "bs_margin": round(bs_margin, 3),
            "bs_average": round(bs_average, 3)
        })
brier_scores_domain = pd.DataFrame(results)

In [4]:
(
    brier_scores_domain
    .to_latex(
        tables_path + "brier_scores_per_domain.txt",
        float_format="%.3f",
        header=True,
        formatters={
            "model": lambda x: r"\texttt{" + str(x) + "}"
        },
        escape=False,
        index=False
    )
)