In [1]:
import pandas as pd
import numpy as np
import altair as alt

In [2]:
PALETTE = ["#bdb8ad", "#ece7e0", "#c6d4e1", "#44749d"]

In [3]:
# read data
dfs = [pd.read_json(f"../benchmark/results/plot{func}_{sub}.json", lines=True) 
       for func in ["", "_correlation", "_missing"] 
       for sub in ["x", "x_y"]
      ]
# explode list of times 
dfs = [df.explode("times").reset_index(drop=True) for df in dfs]

In [4]:
func_map = {
    "PlotX": "plot(df, col₁)",
    "PlotXY": "plot(df, col₁, col₂)",
    "PlotCorrelationX": "plot_correlation(df, col₁)",
    "PlotCorrelationXY": "plot_correlation(df, col₁, col₂)",
    "PlotMissingX": "plot_missing(df, col₁)",
    "PlotMissingXY": "plot_missing(df, col₁, col₂)",
}
plot_col_map = {
    "PlotX": "0",
    "PlotXY": "1",
    "PlotCorrelationX": "0",
    "PlotCorrelationXY": "1",
    "PlotMissingX": "0",
    "PlotMissingXY": "1",
}
threshs = [0.5, 1, 2, 5]
def df_transform_cum(df):
    # percent of tasks finishing within the threshold
    pct = [(df["times"] < thresh).sum() / len(df) * 100 for thresh in threshs]
    cnt = [(df["times"] < thresh).sum() for thresh in threshs]
    res_df = pd.DataFrame({"threshs": threshs, "pct": pct, "cnt": cnt})
    res_df["func"] = func_map[df.loc[0, "name"]]
    res_df["column"] = plot_col_map[df.loc[0, "name"]]
    return res_df

def df_transform_non_cum(df):
    # percent of tasks finishing within interval
    ends = [0, 1, 5, 10, 30, 60]
    pct = [df["times"].between(*rng).sum() / len(df) * 100 for rng in zip(ends, ends[1:])]
    cnt = [df["times"].between(*rng).sum() for rng in zip(ends, ends[1:])]
    res_df = pd.DataFrame({"threshs": threshs, "pct": pct, "cnt": cnt})
    res_df["func"] = func_map[df.loc[0, "name"]]
    res_df["column"] = plot_col_map[df.loc[0, "name"]]
    return res_df

In [5]:
dfs = [df_transform_cum(df) for df in dfs]
# dfs = [df_transform_non_cum(df) for df in dfs]
df = pd.concat(dfs, axis=0)
df["pct"] = np.round(df["pct"], 2)

In [6]:
df["textpos"] = df["pct"]
df["textpos"] = df["textpos"].apply(lambda x: 18 if x < 18 else x)
df["textcolor"] = df["threshs"].apply(lambda x: "black" if x != 5 else "white")

In [7]:
base = alt.Chart(df)
scale = alt.Scale(domain=[0.5, 1. , 2. , 5, "white", "black"], range=[*PALETTE, "white", "black"])
(
    base.mark_bar().encode(
        y=alt.Y("threshs:O", sort=threshs, title="Time Constraint (sec)"),
        x=alt.X("pct:Q", title="Percent of tasks completed"),
        color=alt.Color(
            "threshs:N",
            sort=threshs,
            title="Intervals",
            legend=None,
            scale=scale
        ),
        #     column=alt.Column('column:N', title="Completed Tasks by Function"),
        tooltip=[
            alt.Tooltip("pct:Q", title="percent"),
            alt.Tooltip("cnt:Q", title="count"),
            alt.Tooltip("threshs:O", title="interval"),
        ],
    )
    + base.mark_text(align="right", dy=1, dx=-5, fontSize=16)
    .encode(
        y=alt.Y("threshs:O", sort=threshs, title=""),
        x=alt.X("textpos:Q"),
        text=alt.Text("label:N"),
        color=alt.Color(
            "textcolor:N",
            scale=scale,
            legend=None,
        ),
    )
    .transform_calculate(label='datum.pct + " %"')
).facet(
    alt.Facet("func:N", title="", header=alt.Header(labelFontSize=18)), columns=2
).configure_axis(
    labelFontSize=16, titleFontSize=12
)