In [None]:
import pandas as pd

In [None]:
all_decisions_df = pd.read_csv("results/all_decisions_df.csv")

In [None]:
decision_counts = pd.pivot(
    all_decisions_df.query("task_scope != 'jaro'").groupby(
        ["dataset", "task_scope", "experiment_run", "decision"]
    )["benchmark"].count().reset_index(),
    index=["dataset", "task_scope", "experiment_run"],
    columns="decision",
    values=["benchmark"],
).fillna(0).reset_index()

decision_counts["decisiveness"] = (
    decision_counts.loc[:, (slice(None), ["no", "yes"])].sum(axis="columns") /
    decision_counts.loc[:, (slice(None), ["no", "unknown", "yes"])].sum(axis="columns")
)

In [None]:
decisiveness_table = pd.pivot(
    decision_counts.groupby(["dataset", "task_scope"])["decisiveness"].median().reset_index(),
    index="dataset",
    columns="task_scope",
    values="decisiveness",
)[["1-to-1", "1-to-n", "n-to-1", "n-to-n"]]
decisiveness_table

In [None]:
import plotly.graph_objects as go

fig = go.Figure(
    data=go.Heatmap(
        x=["1-to-1", "1-to-N", "N-to-1", "N-to-M"],
        y=decisiveness_table.sort_index(ascending=False).index,
        z=decisiveness_table.sort_index(ascending=False).values,
        text=decisiveness_table.sort_index(ascending=False).values,
        texttemplate="%{text:.3f}",
        textfont={"size": 16},
        colorscale="PrGn",
        zmin=-1.0,
        zmax=1.0,
        showscale=False,
    ),
    layout=dict(
        title="Median fraction of non-unknown decisions per task scope",
        height=600,
        width=1000,
    ),
)
fig.show()