In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import precision_recall_fscore_support

In [None]:
all_decisions_df = pd.read_csv("results/all_decisions_df.csv")

In [None]:
scores = []
for dataset in all_decisions_df["dataset"].unique():
    _dataset_df = all_decisions_df.query("dataset == @dataset").copy()
    for task_scope in _dataset_df["task_scope"].unique():
        _task_scope_df = _dataset_df.query("task_scope == @task_scope").copy()
        for experiment_run in _task_scope_df["experiment_run"].unique():
            if task_scope == "jaro":
                _experiment_df = _task_scope_df
            else:
                _experiment_df = _task_scope_df.query("experiment_run == @experiment_run").copy()
            p, r, f1, _ = precision_recall_fscore_support(
                _experiment_df["benchmark"],
                _experiment_df["decision"] == "yes",
                average="binary",
                pos_label=True,
                zero_division=0.0
            )
            scores.append({
                "dataset": dataset,
                "task_scope": task_scope,
                "experiment_run": experiment_run,
                "precision": p,
                "recall": r,
                "f1-score": f1,
            })

scores_df = pd.DataFrame(scores)

In [None]:
def style_larger_jaro(s, props=''):
    return np.where(s > s["jaro"], props, '')


def highlight_max(s, props=''):
    return np.where(s == np.nanmax(s.values), props, '')


median_f1_scores = pd.pivot(
    scores_df[["task_scope", "dataset", "f1-score"]].groupby(["task_scope", "dataset"]).median().reset_index(),
    index="dataset",
    columns="task_scope",
    values="f1-score",
)[["jaro", "1-to-1", "1-to-n", "n-to-1", "n-to-n"]]

median_f1_scores.style.apply(
    style_larger_jaro,
    props="text-decoration:underline",
    axis="columns",
).apply(
    highlight_max,
    props="font-weight: bold",
    axis="columns",
)

In [None]:
import plotly.graph_objects as go

values = [row - row[0] for row in median_f1_scores.sort_index(ascending=False).values]
fig = go.Figure(
    data=go.Heatmap(
        x=["baseline", "1-to-1", "1-to-N", "N-to-1", "N-to-M"],
        y=median_f1_scores.sort_index(ascending=False).index,
        z=values,
        text=median_f1_scores.sort_index(ascending=False).values,
        texttemplate="%{text:.3f}",
        textfont={"size": 16},
        colorscale="PRGn",
        zmin=-1.0,
        zmax=1.0,
        showscale=False,
    ),
    layout=dict(
        title="Median F1-scores compared to baseline (green: better, purple: worse)",
        height=600,
        width=1000,
    ),
)
fig.show()