In [59]:
from math import sqrt
import polars as pl
from plotly import express as px

In [60]:
audit = (
    # pl.read_ndjson("../generated/dev5_ACSEmployment_binarized.jsonl")
    pl.read_ndjson("../generated/dev5_ACSEmployment_binarized_linear.jsonl")
    # .select(pl.all().exclude("model_params"))
    .group_by(
        "dataset",
        "model_name",
        "model_params",
        "strategy",
        "strategy_params",
        "audit_budget",
        "detection_tpr",
        "detection_tnr",
    )
    .mean()
    .with_columns(
        strategy_json=pl.col("strategy_params").struct.json_encode(),
        model_json=pl.col("model_params").struct.json_encode(),
    )
    .sort("detection_tpr", "detection_tnr", descending=[True, False])
)

In [61]:
fig = px.scatter(
    audit.filter(
        pl.col("detection_tnr") == 1.0,
        pl.col("detection_tpr") == 1.0,
        pl.col("strategy") == "honest",
    ),
    x="demographic_parity_user",
    y="utility_user",
    color="model_name",
    category_orders={"strategy": audit["strategy"].unique().sort().to_list()},
    height=600,
    width=800,
    title="Honest platform",
)
fig.show()

In [62]:
fig = px.scatter(
    audit.filter(
        pl.col("detection_tnr") == 1.0,
        pl.col("detection_tpr") == 0.5,
    ).with_columns(
        is_honest=pl.when(pl.col("strategy") == "honest")
        .then("strategy")
        .otherwise(pl.lit("sneaky")),
        size=10,
    ),
    x="demographic_parity_audit",
    y="utility_user",
    color="strategy",
    symbol="is_honest",
    size="size",
    size_max=10,
    hover_data=["strategy_json", "model_json", "model_name"],
    category_orders={"strategy": audit["strategy"].unique().sort().to_list()},
    height=600,
    width=800,
    title="Sneaky platform, detection tpr=50%",
)
fig.show()

In [63]:
fig = px.scatter(
    audit.with_columns(
        is_honest=pl.when(pl.col("strategy") == "honest")
        .then("strategy")
        .otherwise(pl.lit("sneaky")),
        size=10,
    ),
    x="demographic_parity_audit",
    y="utility_user",
    color="strategy",
    symbol="is_honest",
    size="size",
    size_max=10,
    facet_row="detection_tpr",
    facet_col="detection_tnr",
    hover_data=["strategy_json", "model_json", "model_name"],
    height=1_000,
    width=1_300,
)

# fig.update_xaxes(range=[0, 0.1])
# fig.update_yaxes(range=[0.81, 0.83])
fig.show()

# What is the best lying strategy ?

In [64]:
manipulation = (
    pl.read_ndjson("../generated/manipulation5_ACSEmployment_binarized_skrub.jsonl")
    .with_columns(
        min_conditional_accuracy=pl.col("conditional_accuracy_user").list.min(),
        strategy_instance=pl.concat_str(
            pl.col("strategy"), pl.col("strategy_params").struct.json_encode()
        ),
        strategy_json=pl.col("strategy_params").struct.json_encode(),
    )
    .group_by(
        "dataset",
        "model_name",
        # "model_params",
        "strategy",
        "strategy_params",
        "strategy_json",
        "audit_budget",
        "detection_tpr",
        "detection_tnr",
    )
    .agg(pl.col("*").mean(), pl.col("*").std().name.suffix("_std"))
    .sort("detection_tpr", "detection_tnr", descending=[True, False])
)

In [65]:
manipulation

dataset,model_name,strategy,strategy_params,strategy_json,audit_budget,detection_tpr,detection_tnr,model_params,entropy,utility_user,utility_audit,conditional_accuracy_user,demographic_parity_user,demographic_parity_audit,min_conditional_accuracy,strategy_instance,model_params_std,entropy_std,utility_user_std,utility_audit_std,conditional_accuracy_user_std,demographic_parity_user_std,demographic_parity_audit_std,min_conditional_accuracy_std,strategy_instance_std
str,str,str,struct[2],str,i64,f64,f64,struct[1],f64,f64,f64,list[f64],f64,f64,f64,str,struct[1],f64,f64,f64,list[f64],f64,f64,f64,str
"""ACSEmployment""","""unconstrained""","""model_swap""","{null,null}","""{""epsilon"":null,""theta"":null}""",1000,1.0,1.0,,2.3527e9,0.851482,0.8545,,0.023558,0.027746,0.812805,,,8.5239e8,0.00124,0.011806,,0.004163,0.014977,0.015728,
"""ACSEmployment""","""unconstrained""","""randomized_response""","{9.0,null}","""{""epsilon"":9.0,""theta"":null}""",1000,1.0,1.0,,2.3527e9,0.851984,0.8553,,0.0239,0.040479,0.811789,,,8.5239e8,0.001123,0.009866,,0.005535,0.025826,0.01485,
"""ACSEmployment""","""unconstrained""","""ROC_mitigation""","{null,0.3}","""{""epsilon"":null,""theta"":0.3}""",1000,1.0,1.0,,2.3527e9,0.852282,0.8552,,0.024969,0.043064,0.813008,,,8.5239e8,0.001359,0.010612,,0.003237,0.020636,0.014308,
"""ACSEmployment""","""unconstrained""","""ROC_mitigation""","{null,0.533333}","""{""epsilon"":null,""theta"":0.5333…",1000,1.0,1.0,,2.3527e9,0.851656,0.8543,,0.005146,0.029324,0.812398,,,8.5239e8,0.001227,0.011461,,0.004478,0.015206,0.01469,
"""ACSEmployment""","""unconstrained""","""ROC_mitigation""","{null,0.566667}","""{""epsilon"":null,""theta"":0.5666…",1000,1.0,1.0,,2.3527e9,0.850331,0.8523,,0.030046,0.024786,0.808943,,,8.5239e8,0.001451,0.013384,,0.006512,0.026654,0.017511,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""ACSEmployment""","""unconstrained""","""ROC_mitigation""","{null,0.566667}","""{""epsilon"":null,""theta"":0.5666…",1000,0.0,1.0,,2.3527e9,0.849843,0.8508,,0.033356,0.025856,0.807317,,,9.0409e8,0.001416,0.015073,,0.004329,0.031852,0.020698,
"""ACSEmployment""","""unconstrained""","""randomized_response""","{3.0,null}","""{""epsilon"":3.0,""theta"":null}""",1000,0.0,1.0,,2.3527e9,0.851766,0.8546,,0.024316,0.04187,0.812195,,,9.0409e8,0.000948,0.011327,,0.004618,0.019096,0.011371,
"""ACSEmployment""","""unconstrained""","""randomized_response""","{6.0,null}","""{""epsilon"":6.0,""theta"":null}""",1000,0.0,1.0,,2.3527e9,0.851867,0.8534,,0.021968,0.041121,0.813415,,,9.0409e8,0.00166,0.01226,,0.003881,0.021818,0.013436,
"""ACSEmployment""","""unconstrained""","""randomized_response""","{1.0,null}","""{""epsilon"":1.0,""theta"":null}""",1000,0.0,1.0,,2.3527e9,0.852945,0.8562,,0.02371,0.042683,0.81626,,,9.0409e8,0.000983,0.012377,,0.004176,0.026491,0.013062,


In [66]:
px.line(
    manipulation.sort("detection_tpr").with_columns(
        pl.col("min_conditional_accuracy_std") / sqrt(5)
    ),
    x="detection_tpr",
    y="min_conditional_accuracy",
    error_y="min_conditional_accuracy_std",
    facet_col="strategy",
    color="strategy_json",
)

In [67]:
px.line(
    manipulation.sort("detection_tpr").with_columns(
        pl.col("demographic_parity_audit_std") / sqrt(5)
    ),
    x="detection_tpr",
    y="demographic_parity_audit",
    error_y="demographic_parity_audit_std",
    facet_col="strategy",
    color="strategy_json",
)

# Finding datasets with the largest base rate difference

In [68]:
base_rate = pl.read_ndjson("../generated/disparities.jsonl")

In [69]:
px.line(
    base_rate,
    x="year",
    y="base_rate",
    color="state",
    facet_col="group",
    hover_data="len",
)