In [None]:
import pandas as pd
from sklearn.metrics import recall_score

In [None]:
all_decisions_df = pd.read_csv("results/all_decisions_df.csv")

In [None]:
verification_effort = []
for dataset in all_decisions_df["dataset"].unique():
    _dataset_df = all_decisions_df.query("dataset == @dataset").copy()
    for task_scope in _dataset_df["task_scope"].unique():
        _task_scope_df = _dataset_df.query("task_scope == @task_scope").copy()
        for experiment_run in _task_scope_df["experiment_run"].unique():
            if task_scope == "jaro":
                _experiment_df = _task_scope_df
                experiment_run = 0
            else:
                _experiment_df = _task_scope_df.query("experiment_run == @experiment_run").copy()
            verification_effort.append({
                "dataset": dataset,
                "task_scope": task_scope,
                "experiment_run": experiment_run,
                "count": _experiment_df.query("decision == 'yes'").shape[0],
                "recall": recall_score(
                    _experiment_df["benchmark"],
                    _experiment_df["decision"] == "yes",
                    average="binary",
                    pos_label=True,
                    zero_division=0.0,
                )
            })

verification_effort_df = pd.DataFrame(verification_effort)

In [None]:
effort_and_recall_df = verification_effort_df.groupby(
    by=["dataset", "task_scope"]
)["count"].median().reset_index().merge(
    verification_effort_df.groupby(
        ["dataset", "task_scope", "count"]
    )["recall"].median().reset_index(),
    on=["dataset", "task_scope", "count"],
    how="left",
)
median_efforts = pd.pivot(
    effort_and_recall_df,
    index="dataset",
    columns="task_scope",
    values=["count", "recall"],
)

In [None]:
import itertools

scope_ord = ["jaro", "1-to-1", "1-to-n", "n-to-1", "n-to-n"]
median_efforts["count"] = median_efforts["count"].astype(int)
median_efforts = median_efforts.swaplevel(0, 1, axis="columns")[itertools.product(scope_ord, ("count", "recall"))].copy()

In [None]:
import numpy as np

def highlight_max_recall(s, props=''):
    return np.where(
        ([False, True] * 5) &  # ignore every other column (the count)
        (s == np.nanmax(s.loc[(slice(None), "recall")].values)),
        props,
        ''
    )

median_efforts.style.apply(
    highlight_max_recall,
    props="font-weight: bold",
    axis="columns",
)

In [None]:
import itertools

import plotly.graph_objects as go


def repeat_each(iterable, n):
    "Returns each sequence element n times."
    return itertools.chain.from_iterable(itertools.repeat(e, n) for e in iterable)

values = [[0 if i % 2 == 0 else value for i, value in enumerate(row)] for row in median_efforts.sort_index(ascending=False).values]
texts = [[f"{int(value):d}" if i % 2 == 0 else f"{value:.3f}" for i, value in enumerate(row)] for row in median_efforts.sort_index(ascending=False).values]
fig = go.Figure(
    data=go.Heatmap(
        x=[list(repeat_each(["baseline", "1-to-1", "1-to-N", "N-to-1", "N-to-M"], 2)), ["count", "recall"] * 5],
        y=median_efforts.sort_index(ascending=False).index,
        z=values,
        text=texts,
        texttemplate="%{text}",
        textfont={"size": 16},
        colorscale="BuGn",
        showscale=False,
    ),
    layout=dict(
        title="Counts of matches reported per task scope and their recall",
        height=600,
        width=1000,
    ),
)
fig.show()