In [1]:
import polars as pl
from plotly import express as px

In [2]:
audit = (
    pl.read_ndjson("../generated/dev5_ACSEmployment_binarized.jsonl")
    # .select(pl.all().exclude("model_params"))
    .group_by(
        "dataset",
        "model_name",
        "model_params",
        "strategy",
        "strategy_params",
        "audit_budget",
        "detection_tpr",
        "detection_tnr",
    )
    .mean()
    .sort("detection_tpr", "detection_tnr", descending=[True, False])
)
audit = audit.with_columns(
    strategy_json=pl.lit(
        audit.select("strategy_params").unnest("strategy_params").write_ndjson()
    )
    .str.strip_chars("\n")
    .str.split("\n")
    .arr.explode(),
    model_json=pl.lit(
        audit.select("model_params").unnest("model_params").write_ndjson()
    )
    .str.strip_chars("\n")
    .str.split("\n")
    .arr.explode(),
)

In [3]:
audit

dataset,model_name,model_params,strategy,strategy_params,audit_budget,detection_tpr,detection_tnr,entropy,utility_user,utility_audit,demographic_parity_user,demographic_parity_audit,strategy_json,model_json
str,str,struct[1],str,struct[2],i64,f64,f64,f64,f64,f64,f64,f64,str,str
"""ACSEmployment""","""unconstrained""",{null},"""ROC_mitigation""","{null,0.366667}",1000,1.0,0.95,2.3527e9,0.823293,0.8244,0.055893,0.070724,"""{""epsilon"":null,""theta"":0.3666…","""{""epsilon"":null}"""
"""ACSEmployment""","""exponentiated_gradient""",{0.056},"""honest""","{null,null}",1000,1.0,0.95,2.3527e9,0.823593,0.8262,0.058005,0.072043,"""{""epsilon"":null,""theta"":null}""","""{""epsilon"":0.05600000000000001…"
"""ACSEmployment""","""exponentiated_gradient""",{0.012},"""honest""","{null,null}",1000,1.0,0.95,2.3527e9,0.823063,0.825,0.018154,0.033753,"""{""epsilon"":null,""theta"":null}""","""{""epsilon"":0.012}"""
"""ACSEmployment""","""exponentiated_gradient""",{0.1},"""honest""","{null,null}",1000,1.0,0.95,2.3527e9,0.823822,0.825,0.054049,0.071043,"""{""epsilon"":null,""theta"":null}""","""{""epsilon"":0.1}"""
"""ACSEmployment""","""exponentiated_gradient""",{0.078},"""honest""","{null,null}",1000,1.0,0.95,2.3527e9,0.823681,0.824,0.055743,0.069187,"""{""epsilon"":null,""theta"":null}""","""{""epsilon"":0.07800000000000001…"
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""ACSEmployment""","""unconstrained""",{null},"""ROC_mitigation""","{null,0.566667}",1000,0.5,1.0,2.3527e9,0.822181,0.8202,0.010708,0.012156,"""{""epsilon"":null,""theta"":0.5666…","""{""epsilon"":null}"""
"""ACSEmployment""","""unconstrained""",{null},"""ROC_mitigation""","{null,0.433333}",1000,0.5,1.0,2.3527e9,0.823787,0.8238,0.056853,0.068481,"""{""epsilon"":null,""theta"":0.4333…","""{""epsilon"":null}"""
"""ACSEmployment""","""unconstrained""",{null},"""randomized_response""","{6.0,null}",1000,0.5,1.0,2.3527e9,0.823346,0.8242,0.055549,0.070455,"""{""epsilon"":6.0,""theta"":null}""","""{""epsilon"":null}"""
"""ACSEmployment""","""unconstrained""",{null},"""ROC_mitigation""","{null,0.333333}",1000,0.5,1.0,2.3527e9,0.823663,0.823,0.054364,0.06518,"""{""epsilon"":null,""theta"":0.3333…","""{""epsilon"":null}"""


In [19]:
fig = px.scatter(
    audit.filter(
        pl.col("detection_tnr") == 1.0,
        pl.col("detection_tpr") == 1.0,
        pl.col("strategy") == "honest",
    ),
    x="demographic_parity_user",
    y="utility_user",
    color="model_name",
    category_orders={"strategy": audit["strategy"].unique().sort().to_list()},
    height=600,
    width=800,
    title="Honest platform",
)
fig.show()

In [71]:
fig = px.scatter(
    audit.filter(
        pl.col("detection_tnr") == 1.0,
        pl.col("detection_tpr") == 0.5,
    ).with_columns(
        is_honest=pl.when(pl.col("strategy") == "honest")
        .then("strategy")
        .otherwise(pl.lit("sneaky")),
        size=10,
    ),
    x="demographic_parity_audit",
    y="utility_user",
    color="strategy",
    symbol="is_honest",
    size="size",
    size_max=10,
    hover_data=["strategy_json", "model_json", "model_name"],
    category_orders={"strategy": audit["strategy"].unique().sort().to_list()},
    height=600,
    width=800,
    title="Sneaky platform, detection tpr=50%",
)
fig.show()

In [64]:
fig = px.scatter(
    audit.with_columns(
        is_honest=pl.when(pl.col("strategy") == "honest")
        .then("strategy")
        .otherwise(pl.lit("sneaky")),
        size=10,
    ),
    x="demographic_parity_audit",
    y="utility_user",
    color="strategy",
    symbol="is_honest",
    size="size",
    size_max=10,
    facet_row="detection_tpr",
    facet_col="detection_tnr",
    hover_data=["strategy_json", "model_json", "model_name"],
    height=1_000,
    width=1_300,
)

# fig.update_xaxes(range=[0, 0.1])
# fig.update_yaxes(range=[0.81, 0.83])
fig.show()