In [17]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from matplotlib import cbook

warnings.filterwarnings("ignore")
%matplotlib inline

In [18]:
task_data = "income"

In [19]:
# scores loading
import numpy as np
import pandas as pd

DATA_PATH = "../../src/data/evaluation"
TEST_PATH = f"../../src/data/acs_{task_data}/processed/acs_{task_data}_test.csv"

BASELINE = f"{DATA_PATH}/baseline/{task_data}"
SEPARATION = f"{DATA_PATH}/hardt2016/{task_data}"
INDENPENDENCE = f"{DATA_PATH}/kamiran_calders2012/{task_data}"
SUFFICIENCY = f"{DATA_PATH}/pleiss2017/{task_data}/calib_weighted"

base_pred = pd.read_csv(f"{BASELINE}/XGBClassifier_predictions.csv")
sep_pred = pd.read_csv(f"{SEPARATION}/XGBClassifier_separation_predictions.csv")
ind_pred = pd.read_csv(f"{INDENPENDENCE}/XGBClassifier_independence_predictions.csv")
suf_pred = pd.read_csv(f"{SUFFICIENCY}/XGBClassifier_sufficiency_predictions.csv")

base_scores = np.load(f"{BASELINE}/XGBClassifier_scores.npy", allow_pickle=True).item()
base_scores_cond = np.load(f"{BASELINE}/XGBClassifier_conditional_scores.npy", allow_pickle=True).item()

sep_scores = np.load(f"{SEPARATION}/XGBClassifier_scores_separation.npy", allow_pickle=True).item()
sep_scores_cond = np.load(f"{SEPARATION}/XGBClassifier_conditional_scores_separation.npy", allow_pickle=True).item()

ind_scores = np.load(f"{INDENPENDENCE}/XGBClassifier_scores_independence.npy", allow_pickle=True).item()
ind_scores_cond = np.load(
    f"{INDENPENDENCE}/XGBClassifier_conditional_scores_independence.npy", allow_pickle=True
).item()

suf_scores = np.load(f"{SUFFICIENCY}/XGBClassifier_scores_sufficiency.npy", allow_pickle=True).item()
suf_scores_cond = np.load(f"{SUFFICIENCY}/XGBClassifier_conditional_scores_sufficiency.npy", allow_pickle=True).item()

df_test = pd.read_csv(TEST_PATH)

In [20]:
# data loading as dataframes

df_base = pd.DataFrame.from_dict(base_scores, orient="index")
df_base_cond = pd.DataFrame.from_dict(base_scores_cond, orient="index")

df_sep = pd.DataFrame.from_dict(sep_scores, orient="index")
df_sep_cond = pd.DataFrame.from_dict(sep_scores_cond, orient="index")

df_ind = pd.DataFrame.from_dict(ind_scores, orient="index")
df_ind_cond = pd.DataFrame.from_dict(ind_scores_cond, orient="index")

df_suf = pd.DataFrame.from_dict(suf_scores, orient="index")
df_suf_cond = pd.DataFrame.from_dict(suf_scores_cond, orient="index")

In [21]:
def get_confidence_interval(scores):
    from scipy import stats

    mean = scores.mean()
    sem = stats.sem(scores)
    ci = stats.t.interval(0.95, len(scores) - 1, loc=mean, scale=sem)
    return ci

In [None]:
mpl.rcParams["figure.dpi"] = 100
colors = plt.get_cmap("Dark2")

box_colors = plt.get_cmap("Set3")
box_colors

## Performance evaluation metrics

In [23]:
# performance evaluation metrics
bal_acc_base = df_base["BAL_ACC"]
bal_acc_sep = df_sep["BAL_ACC"]
bal_acc_ind = df_ind["BAL_ACC"]
bal_acc_suf = df_suf["BAL_ACC"]

ppv_base = df_base["PPV"]
ppv_sep = df_sep["PPV"]
ppv_ind = df_ind["PPV"]
ppv_suf = df_suf["PPV"]

recall_base = df_base["TPR"]
recall_sep = df_sep["TPR"]
recall_ind = df_ind["TPR"]
recall_suf = df_suf["TPR"]

f1_macro_base = df_base["F1_MACRO"]
f1_macro_sep = df_sep["F1_MACRO"]
f1_macro_ind = df_ind["F1_MACRO"]
f1_macro_suf = df_suf["F1_MACRO"]

# x-axis values (assuming same x-axis for all arrays)
x_values = list(range(1, 11))

In [None]:
df_base_cond.columns

In [None]:
# EXAMPLE
def barplot_annotate_brackets(
    num1, num2, data, center, height, yerr=None, dh=0.05, barh=0.05, fs=None, maxasterix=None
):
    """
    Annotate barplot with p-values.

    :param num1: number of left bar to put bracket over
    :param num2: number of right bar to put bracket over
    :param data: string to write or number for generating asterixes
    :param center: centers of all bars (like plt.bar() input)
    :param height: heights of all bars (like plt.bar() input)
    :param yerr: yerrs of all bars (like plt.bar() input)
    :param dh: height offset over bar / bar + yerr in axes coordinates (0 to 1)
    :param barh: bar height in axes coordinates (0 to 1)
    :param fs: font size
    :param maxasterix: maximum number of asterixes to write (for very small p-values)
    """

    if type(data) is str:
        text = data
    else:
        # * is p < 0.05
        # ** is p < 0.005
        # *** is p < 0.0005
        # etc.
        text = ""
        p = 0.05

        while data < p:
            text += "*"
            p /= 10.0

            if maxasterix and len(text) == maxasterix:
                break

        if len(text) == 0:
            text = "n. s."

    lx, ly = center[num1], height[num1]
    rx, ry = center[num2], height[num2]

    if yerr:
        ly += yerr[num1]
        ry += yerr[num2]

    ax_y0, ax_y1 = plt.gca().get_ylim()
    dh *= ax_y1 - ax_y0
    barh *= ax_y1 - ax_y0

    y = max(ly, ry) + dh

    barx = [lx, lx, rx, rx]
    bary = [y, y + barh, y + barh, y]
    mid = ((lx + rx) / 2, y + barh)

    plt.plot(barx, bary, c="black")

    kwargs = dict(ha="center", va="bottom")
    if fs is not None:
        kwargs["fontsize"] = fs

    plt.text(*mid, text, **kwargs)


heights = [1.8, 2, 3]
bars = np.arange(len(heights))

plt.figure()
plt.bar(bars, heights, align="center")
plt.ylim(0, 5)
barplot_annotate_brackets(0, 1, 0.1, bars, heights)
barplot_annotate_brackets(1, 2, 0.001, bars, heights)
barplot_annotate_brackets(0, 2, "p < 0.0075", bars, heights, dh=0.2)

In [None]:
# Plotting the curves
fig, axs = plt.subplots(2, 2)

plt.rcParams["axes.labelsize"] = 8
plt.rcParams["lines.markersize"] = 4
plt.rcParams["ytick.labelsize"] = 8
# plt.rcParams['xtick.labelsize'] = 6
# plt.rcParams["axes.titlesize"] = 8

# Adjusting space between subplots
plt.subplots_adjust(hspace=0.4, wspace=0.4)

axs[0, 0].plot(x_values, bal_acc_base, label="Baseline", marker="o", color=colors(0))
axs[0, 0].plot(x_values, bal_acc_sep, label="Separation", marker="s", color=colors(1))
axs[0, 0].plot(x_values, bal_acc_ind, label="Independence", marker="^", color=colors(2))
axs[0, 0].plot(x_values, bal_acc_suf, label="Sufficiency", marker="*", color=colors(3))

# Adding title and labels
axs[0, 0].set_title("bal_acc over 10 iterations", fontsize=10)
axs[0, 0].set_xlabel("iterations")
axs[0, 0].set_ylabel("balanced accuracy")

# ------------------------------------------------------------------#
axs[0, 1].plot(x_values, ppv_base, label="Baseline", marker="o", color=colors(0))
axs[0, 1].plot(x_values, ppv_sep, label="Separation", marker="s", color=colors(1))
axs[0, 1].plot(x_values, ppv_ind, label="Independence", marker="^", color=colors(2))
axs[0, 1].plot(x_values, ppv_suf, label="Sufficiency", marker="*", color=colors(3))

# Adding title and labels
axs[0, 1].set_title("ppv over 10 iterations", fontsize=10)
axs[0, 1].set_xlabel("iterations")
axs[0, 1].set_ylabel("ppv (precision)")

# ------------------------------------------------------------------#
axs[1, 0].plot(x_values, recall_base, label="Baseline", marker="o", color=colors(0))
axs[1, 0].plot(x_values, recall_sep, label="Separation", marker="s", color=colors(1))
axs[1, 0].plot(x_values, recall_ind, label="Independence", marker="^", color=colors(2))
axs[1, 0].plot(x_values, recall_suf, label="Sufficiency", marker="*", color=colors(3))

# Adding title and labels
axs[1, 0].set_title("tpr over 10 iterations", fontsize=10)
axs[1, 0].set_xlabel("iterations")
axs[1, 0].set_ylabel("tpr (recall)")

# ------------------------------------------------------------------#
axs[1, 1].plot(x_values, f1_macro_base, label="Baseline", marker="o", color=colors(0))
axs[1, 1].plot(x_values, f1_macro_sep, label="Separation", marker="s", color=colors(1))
axs[1, 1].plot(x_values, f1_macro_ind, label="Independence", marker="^", color=colors(2))
axs[1, 1].plot(x_values, f1_macro_suf, label="Sufficiency", marker="*", color=colors(3))

# Adding title and labels
axs[1, 1].set_title("f1-macro over 10 iterations", fontsize=10)
axs[1, 1].set_xlabel("iterations")
axs[1, 1].set_ylabel("f1-macro")

# Adding a single legend at the top
fig.legend(
    ["Baseline", "Separation", "Independence", "Sufficiency"],
    loc="upper center",
    bbox_to_anchor=(0.55, 1.05),
    ncol=4,
    fontsize=7,
)

# Display the plot
plt.tight_layout()
# plt.savefig("../assets/performance_evaluation_values.png", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# Plotting the curves
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4))

plt.rcParams["axes.labelsize"] = 10
plt.rcParams["lines.markersize"] = 4
plt.rcParams["ytick.labelsize"] = 10
# Adjusting space between subplots
plt.subplots_adjust(hspace=0.4, wspace=0.4)

ax1.plot(x_values, bal_acc_base, label="Baseline", marker="o", color=colors(0))
ax1.plot(x_values, bal_acc_sep, label="Separation", marker="s", color=colors(1))
ax1.plot(x_values, bal_acc_ind, label="Independence", marker="^", color=colors(2))
ax1.plot(x_values, bal_acc_suf, label="Sufficiency", marker="*", color=colors(3))

# Adding title and labels
ax1.set_title("bal_acc over 10 iterations with XGBClassifier", fontsize=10)
ax1.set_xlabel("ten folds iterations")
ax1.set_ylabel("balanced accuracy")
ax1.set_yticks(np.arange(0.745, 0.795, 0.005))
ax1.yaxis.grid(True)

# ------------------------------------------------------------------#
ax2.plot(x_values, f1_macro_base, label="Baseline", marker="o", color=colors(0))
ax2.plot(x_values, f1_macro_sep, label="Separation", marker="s", color=colors(1))
ax2.plot(x_values, f1_macro_ind, label="Independence", marker="^", color=colors(2))
ax2.plot(x_values, f1_macro_suf, label="Sufficiency", marker="*", color=colors(3))

# Adding title and labels
ax2.set_title("f1-macro over 10 iterations", fontsize=10)
ax2.set_xlabel("ten folds iterations")
ax2.set_ylabel("f1-macro")
# set tick values interval 
ax2.set_yticks(np.arange(0.745, 0.795, 0.005))
ax2.yaxis.grid(True)
# Adding a single legend at the top
fig.legend(
    ["Baseline", "Separation", "Independence", "Sufficiency"],
    loc="upper center",
    bbox_to_anchor=(0.55, 1.10),
    ncol=4,
    fontsize=10,
)

# Display the plot
plt.tight_layout()
# plt.savefig("../assets/performance_eval_values.png", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
legend_labels = ["Females", "Males"]

base = (df_base_cond["UNP_ACC"].values, df_base_cond["PRIV_ACC"].values)
sep = (df_sep_cond["UNP_ACC"].values, df_sep_cond["PRIV_ACC"].values)
ind = (df_ind_cond["UNP_ACC"].values, df_ind_cond["PRIV_ACC"].values)
suf = (df_suf_cond["UNP_ACC"].values, df_suf_cond["PRIV_ACC"].values)

fig, ax = plt.subplots(1, 4, figsize=(10, 4.5))

num_groups = len(base)
group_width = 1
box_width = group_width / 2
positions = np.arange(num_groups)

box_properties = {
    "patch_artist": True,
    "showfliers": False,
    "medianprops": {"color": "black"},
    "whiskerprops": {"color": "black"},
    "capprops": {"color": "black"},
    "flierprops": {"markeredgecolor": "black"},
}

i=1
# Adding the significance bar
x1 = positions[i] - box_width / 2 - 0.02
x2 = positions[i] + box_width / 2 + 0.02

stats1 = cbook.boxplot_stats([base[0]])
stats2 = cbook.boxplot_stats([base[1]])
bp1 = ax[0].bxp(stats1, positions=[positions[i] - box_width / 2], widths=box_width, **box_properties)
bp2 = ax[0].bxp(stats2, positions=[positions[i] + box_width / 2], widths=box_width, **box_properties)

for patch in bp1["boxes"]:
    patch.set_facecolor(box_colors(3))
for patch in bp2["boxes"]:
    patch.set_facecolor(box_colors(4))

ax[0].set_xticks([])
ax[0].yaxis.grid(True)
ax[0].set_xlabel("baseline")
ax[0].set_ylabel("error rates scale")
ax[0].set_title("acc across groups", fontsize=10)

# Plot the significance bar
y_max = max(base[0].max(), base[1].max())
y_max = y_max + 0.001
y, h, color = y_max, 0.001, "#3D3D3D"  # Adjust these values based on your plot scale
ax[0].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
ax[0].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")

# separation
stats1 = cbook.boxplot_stats([sep[0]])
stats2 = cbook.boxplot_stats([sep[1]])
bp1 = ax[1].bxp(stats1, positions=[positions[i] - box_width / 2], widths=box_width, **box_properties)
bp2 = ax[1].bxp(stats2, positions=[positions[i] + box_width / 2], widths=box_width, **box_properties)

for patch in bp1["boxes"]:
    patch.set_facecolor(box_colors(3))
for patch in bp2["boxes"]:
    patch.set_facecolor(box_colors(4))

ax[1].set_xticks([])
ax[1].yaxis.grid(True)
ax[1].set_xlabel("threshold_opt")
ax[1].set_ylabel("error rates scale")
ax[1].set_title("acc across groups", fontsize=10)

# Plot the significance bar
y_max = max(sep[0].max(), sep[1].max())
y_max = y_max + 0.001
y, h, color = y_max, 0.001, "#3D3D3D"  # Adjust these values based on your plot scale
ax[1].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
ax[1].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")


# independence
stats1 = cbook.boxplot_stats([ind[0]])
stats2 = cbook.boxplot_stats([ind[1]])
bp1 = ax[2].bxp(stats1, positions=[positions[i] - box_width / 2], widths=box_width, **box_properties)
bp2 = ax[2].bxp(stats2, positions=[positions[i] + box_width / 2], widths=box_width, **box_properties)

for patch in bp1["boxes"]:
    patch.set_facecolor(box_colors(3))
for patch in bp2["boxes"]:
    patch.set_facecolor(box_colors(4))

ax[2].set_xticks([])
ax[2].yaxis.grid(True)
ax[2].set_xlabel("reweighting")
ax[2].set_ylabel("error rates scale")
ax[2].set_title("acc across groups", fontsize=10)

# Plot the significance bar
y_max = max(ind[0].max(), ind[1].max())
y_max = y_max + 0.001
y, h, color = y_max, 0.001, "#3D3D3D"  # Adjust these values based on your plot scale
ax[2].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
ax[2].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")


# sufficiency
stats1 = cbook.boxplot_stats([suf[0]])
stats2 = cbook.boxplot_stats([suf[1]])
bp1 = ax[3].bxp(stats1, positions=[positions[i] - box_width / 2], widths=box_width, **box_properties)
bp2 = ax[3].bxp(stats2, positions=[positions[i] + box_width / 2], widths=box_width, **box_properties)

for patch in bp1["boxes"]:
    patch.set_facecolor(box_colors(3))
for patch in bp2["boxes"]:
    patch.set_facecolor(box_colors(4))

ax[3].set_xticks([])
ax[3].yaxis.grid(True)
ax[3].set_xlabel("calibration")
ax[3].set_ylabel("error rates scale")
ax[3].set_title("acc across groups", fontsize=10)

# Plot the significance bar
y_max = max(suf[0].max(), suf[1].max())
y_max = y_max + 0.001
y, h, color = y_max, 0.001, "#3D3D3D"  # Adjust these values based on your plot scale
ax[3].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
ax[3].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")


first_legend = ax[3].legend(labels=legend_labels, bbox_to_anchor=(1.04, 1), title="sensitive groups", loc="upper left")

from matplotlib.lines import Line2D

legend_elements = [
    Line2D([0], [0], marker="None", color="none", lw=1.0, label="* p < 0.05"),
    Line2D([0], [0], marker="None", color="none", lw=1.0, label="** p < 0.01"),
    Line2D([0], [0], marker="None", color="none", lw=1.0, label="*** p < 0.001"),
]

# Adding the custom legend to the plot
# Add the second legend to the same axis, but outside of the plot
second_legend = ax[3].legend(
    handles=legend_elements,
    loc="upper left",
    bbox_to_anchor=(1.05, 0.75),
    ncol=1,
    title="significance levels",
    fontsize=8,
)
ax[3].add_artist(first_legend)

plt.tight_layout()
# plt.savefig("../assets/boxplot_acc.png", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
legend_labels = ["Females", "Males"]

base = (df_base_cond["UNP_TPR"].values, df_base_cond["PRIV_TPR"].values)
sep = (df_sep_cond["UNP_TPR"].values, df_sep_cond["PRIV_TPR"].values)
ind = (df_ind_cond["UNP_TPR"].values, df_ind_cond["PRIV_TPR"].values)
suf = (df_suf_cond["UNP_TPR"].values, df_suf_cond["PRIV_TPR"].values)

fig, ax = plt.subplots(1, 4, figsize=(10, 4.5))

num_groups = len(base)
box_width = group_width / 2
positions = np.arange(num_groups)

box_properties = {
    "patch_artist": True,
    "showfliers": False,
    "medianprops": {"color": "black"},
    "whiskerprops": {"color": "black"},
    "capprops": {"color": "black"},
    "flierprops": {"markeredgecolor": "black"},
}

i = 1
# Adding the significance bar
x1 = positions[i] - box_width / 2 - 0.02
x2 = positions[i] + box_width / 2 + 0.02

stats1 = cbook.boxplot_stats([base[0]])
stats2 = cbook.boxplot_stats([base[1]])
bp1 = ax[0].bxp(stats1, positions=[positions[i] - box_width / 2], widths=box_width, **box_properties)
bp2 = ax[0].bxp(stats2, positions=[positions[i] + box_width / 2], widths=box_width, **box_properties)

for patch in bp1["boxes"]:
    patch.set_facecolor(box_colors(3))
for patch in bp2["boxes"]:
    patch.set_facecolor(box_colors(4))

ax[0].set_xticks([])
ax[0].yaxis.grid(True)
ax[0].set_xlabel("baseline")
ax[0].set_ylabel("error rates scale")
ax[0].set_title("tpr across groups", fontsize=10)

# Plot the significance bar
y_max = max(base[0].max(), base[1].max())
y_max = y_max + 0.002
y, h, color = y_max, 0.001, "#3D3D3D"  # Adjust these values based on your plot scale
ax[0].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
ax[0].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")

# separation
stats1 = cbook.boxplot_stats([sep[0]])
stats2 = cbook.boxplot_stats([sep[1]])
bp1 = ax[1].bxp(stats1, positions=[positions[i] - box_width / 2], widths=box_width, **box_properties)
bp2 = ax[1].bxp(stats2, positions=[positions[i] + box_width / 2], widths=box_width, **box_properties)

for patch in bp1["boxes"]:
    patch.set_facecolor(box_colors(3))
for patch in bp2["boxes"]:
    patch.set_facecolor(box_colors(4))

ax[1].set_xticks([])
ax[1].yaxis.grid(True)
ax[1].set_xlabel("threshold_opt")
ax[1].set_ylabel("error rates scale")
ax[1].set_title("tpr across groups", fontsize=10)

# Plot the significance bar
y_max = max(sep[0].max(), sep[1].max())
y_max = y_max + 0.001
y, h, color = y_max, 0.001, "#3D3D3D"  # Adjust these values based on your plot scale
ax[1].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
ax[1].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")


# independence
stats1 = cbook.boxplot_stats([ind[0]])
stats2 = cbook.boxplot_stats([ind[1]])
bp1 = ax[2].bxp(stats1, positions=[positions[i] - box_width / 2], widths=box_width, **box_properties)
bp2 = ax[2].bxp(stats2, positions=[positions[i] + box_width / 2], widths=box_width, **box_properties)

for patch in bp1["boxes"]:
    patch.set_facecolor(box_colors(3))
for patch in bp2["boxes"]:
    patch.set_facecolor(box_colors(4))

ax[2].set_xticks([])
ax[2].yaxis.grid(True)
ax[2].set_xlabel("reweighting")
ax[2].set_ylabel("error rates scale")
ax[2].set_title("tpr across groups", fontsize=10)

# Plot the significance bar
y_max = max(ind[0].max(), ind[1].max())
y_max = y_max + 0.001
y, h, color = y_max, 0.0009, "#3D3D3D"  # Adjust these values based on your plot scale
ax[2].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
ax[2].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")


# sufficiency
stats1 = cbook.boxplot_stats([suf[0]])
stats2 = cbook.boxplot_stats([suf[1]])
bp1 = ax[3].bxp(stats1, positions=[positions[i] - box_width / 2], widths=box_width, **box_properties)
bp2 = ax[3].bxp(stats2, positions=[positions[i] + box_width / 2], widths=box_width, **box_properties)

for patch in bp1["boxes"]:
    patch.set_facecolor(box_colors(3))
for patch in bp2["boxes"]:
    patch.set_facecolor(box_colors(4))

ax[3].set_xticks([])
ax[3].yaxis.grid(True)
ax[3].set_xlabel("calibration")
ax[3].set_ylabel("error rates scale")
ax[3].set_title("tpr across groups", fontsize=10)

# Plot the significance bar
y_max = max(suf[0].max(), suf[1].max())
y_max = y_max + 0.0025
y, h, color = y_max, 0.001, "#3D3D3D"  # Adjust these values based on your plot scale
ax[3].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
ax[3].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")


first_legend = ax[3].legend(labels=legend_labels, bbox_to_anchor=(1.04, 1), title="sensitive groups", loc="upper left")

from matplotlib.lines import Line2D

legend_elements = [
    Line2D([0], [0], marker="None", color="none", lw=1.0, label="* p < 0.05"),
    Line2D([0], [0], marker="None", color="none", lw=1.0, label="** p < 0.01"),
    Line2D([0], [0], marker="None", color="none", lw=1.0, label="*** p < 0.001"),
]

# Adding the custom legend to the plot
# Add the second legend to the same axis, but outside of the plot
second_legend = ax[3].legend(
    handles=legend_elements,
    loc="upper left",
    bbox_to_anchor=(1.05, 0.75),
    ncol=1,
    title="significance levels",
    fontsize=8,
)
ax[3].add_artist(first_legend)
plt.tight_layout()
# plt.savefig("../assets/boxplot_tpr.png", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
legend_labels = ["Females", "Males"]

base = (df_base_cond["UNP_FNR"].values, df_base_cond["PRIV_FNR"].values)
sep = (df_sep_cond["UNP_FNR"].values, df_sep_cond["PRIV_FNR"].values)
ind = (df_ind_cond["UNP_FNR"].values, df_ind_cond["PRIV_FNR"].values)
suf = (df_suf_cond["UNP_FNR"].values, df_suf_cond["PRIV_FNR"].values)

fig, ax = plt.subplots(1, 4, figsize=(10, 4.5))

num_groups = len(base)
box_width = group_width / 2
positions = np.arange(num_groups)

box_properties = {
    "patch_artist": True,
    "showfliers": False,
    "medianprops": {"color": "black"},
    "whiskerprops": {"color": "black"},
    "capprops": {"color": "black"},
    "flierprops": {"markeredgecolor": "black"},
}

# Adding the significance bar
x1 = positions[i] - box_width / 2 - 0.02
x2 = positions[i] + box_width / 2 + 0.02

stats1 = cbook.boxplot_stats([base[0]])
stats2 = cbook.boxplot_stats([base[1]])
bp1 = ax[0].bxp(stats1, positions=[positions[i] - box_width / 2], widths=box_width, **box_properties)
bp2 = ax[0].bxp(stats2, positions=[positions[i] + box_width / 2], widths=box_width, **box_properties)

for patch in bp1["boxes"]:
    patch.set_facecolor(box_colors(3))
for patch in bp2["boxes"]:
    patch.set_facecolor(box_colors(4))

ax[0].set_xticks([])
ax[0].yaxis.grid(True)
ax[0].set_xlabel("baseline")
ax[0].set_ylabel("error rates scale")
ax[0].set_title("fnr across groups", fontsize=10)

# Plot the significance bar
y_max = max(base[0].max(), base[1].max())
y_max = y_max + 0.002
y, h, color = y_max, 0.001, "#3D3D3D"  # Adjust these values based on your plot scale
ax[0].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
ax[0].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")

# separation
stats1 = cbook.boxplot_stats([sep[0]])
stats2 = cbook.boxplot_stats([sep[1]])
bp1 = ax[1].bxp(stats1, positions=[positions[i] - box_width / 2], widths=box_width, **box_properties)
bp2 = ax[1].bxp(stats2, positions=[positions[i] + box_width / 2], widths=box_width, **box_properties)

for patch in bp1["boxes"]:
    patch.set_facecolor(box_colors(3))
for patch in bp2["boxes"]:
    patch.set_facecolor(box_colors(4))

ax[1].set_xticks([])
ax[1].yaxis.grid(True)
ax[1].set_xlabel("threshold_opt")
ax[1].set_ylabel("error rates scale")
ax[1].set_title("fnr across groups", fontsize=10)

# Plot the significance bar
y_max = max(sep[0].max(), sep[1].max())
y_max = y_max + 0.001
y, h, color = y_max, 0.001, "#3D3D3D"  # Adjust these values based on your plot scale
ax[1].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
ax[1].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")


# independence
stats1 = cbook.boxplot_stats([ind[0]])
stats2 = cbook.boxplot_stats([ind[1]])
bp1 = ax[2].bxp(stats1, positions=[positions[i] - box_width / 2], widths=box_width, **box_properties)
bp2 = ax[2].bxp(stats2, positions=[positions[i] + box_width / 2], widths=box_width, **box_properties)

for patch in bp1["boxes"]:
    patch.set_facecolor(box_colors(3))
for patch in bp2["boxes"]:
    patch.set_facecolor(box_colors(4))

ax[2].set_xticks([])
ax[2].yaxis.grid(True)
ax[2].set_xlabel("reweighting")
ax[2].set_ylabel("error rates scale")
ax[2].set_title("fnr across groups", fontsize=10)

# Plot the significance bar
y_max = max(ind[0].max(), ind[1].max())
y_max = y_max + 0.001
y, h, color = y_max, 0.0009, "#3D3D3D"  # Adjust these values based on your plot scale
ax[2].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
ax[2].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")


# sufficiency
stats1 = cbook.boxplot_stats([suf[0]])
stats2 = cbook.boxplot_stats([suf[1]])
bp1 = ax[3].bxp(stats1, positions=[positions[i] - box_width / 2], widths=box_width, **box_properties)
bp2 = ax[3].bxp(stats2, positions=[positions[i] + box_width / 2], widths=box_width, **box_properties)

for patch in bp1["boxes"]:
    patch.set_facecolor(box_colors(3))
for patch in bp2["boxes"]:
    patch.set_facecolor(box_colors(4))

ax[3].set_xticks([])
ax[3].yaxis.grid(True)
ax[3].set_xlabel("calibration")
ax[3].set_ylabel("error rates scale")
ax[3].set_title("fnr across groups", fontsize=10)

# Plot the significance bar
y_max = max(suf[0].max(), suf[1].max())
y_max = y_max + 0.0025
y, h, color = y_max, 0.001, "#3D3D3D"  # Adjust these values based on your plot scale
ax[3].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
ax[3].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")


first_legend = ax[3].legend(labels=legend_labels, bbox_to_anchor=(1.04, 1), title="sensitive groups", loc="upper left")

from matplotlib.lines import Line2D

legend_elements = [
    Line2D([0], [0], marker="None", color="none", lw=1.0, label="* p < 0.05"),
    Line2D([0], [0], marker="None", color="none", lw=1.0, label="** p < 0.01"),
    Line2D([0], [0], marker="None", color="none", lw=1.0, label="*** p < 0.001"),
]

# Adding the custom legend to the plot
# Add the second legend to the same axis, but outside of the plot
second_legend = ax[3].legend(
    handles=legend_elements,
    loc="upper left",
    bbox_to_anchor=(1.05, 0.75),
    ncol=1,
    title="significance levels",
    fontsize=8,
)
ax[3].add_artist(first_legend)

plt.tight_layout()
# plt.savefig("../assets/boxplot_fnr.png", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
legend_labels = ["Females", "Males"]

metric = (df_base["DI"].values, df_sep["DI"].values, df_ind["DI"].values, df_suf["DI"].values)

fig, ax = plt.subplots(figsize=(5, 4.5))

num_groups = len(metric)
box_width = group_width / 4
positions = np.arange(num_groups)

box_properties = {
    "patch_artist": True,
    "showfliers": False,
    "medianprops": {"color": "black"},
    "whiskerprops": {"color": "black"},
    "capprops": {"color": "black"},
    "flierprops": {"markeredgecolor": "black"},
}

# Adding the significance bar
x1 = positions[0] - box_width / 2 - 0.02
x2 = positions[0] + box_width / 2 + 0.02

stats1 = cbook.boxplot_stats([metric[0]])
stats2 = cbook.boxplot_stats([metric[1]])
stats3 = cbook.boxplot_stats([metric[2]])
stats4 = cbook.boxplot_stats([metric[3]])

bp1 = ax.bxp(stats1, positions=[positions[0] - box_width / 3], widths=box_width, **box_properties)
bp2 = ax.bxp(stats2, positions=[positions[1] + box_width / 3], widths=box_width, **box_properties)
bp3 = ax.bxp(stats3, positions=[positions[2] + box_width / 3], widths=box_width, **box_properties)
bp4 = ax.bxp(stats4, positions=[positions[3] + box_width / 3], widths=box_width, **box_properties)

# for patch in bp1["boxes"]:
#     patch.set_facecolor(box_colors(3))
# for patch in bp2["boxes"]:
#     patch.set_facecolor(box_colors(4))

ax.set_xticks([0, 1, 2, 3], labels=["baseline", "threshold_opt", "reweighting", "calibration"])
ax.yaxis.grid(True)
# ax.set_xlabel("baseline")
ax.set_ylabel("error rates scale")
ax.set_title("disparate impact", fontsize=10)

# # Plot the significance bar
# y_max = max(base[0].max(), base[1].max())
# y_max = y_max + 0.002
# y, h, color = y_max, 0.001, "#3D3D3D"  # Adjust these values based on your plot scale
# ax[0].plot([x1, x1, x2, x2], [y, y + h, y + h, y], lw=1.0, c=color)
# ax[0].text((x1 + x2) * 0.5, y + h, "***", ha="center", va="bottom", color="black")

plt.tight_layout()
# plt.savefig("../assets/boxplot_di.png", dpi=300, bbox_inches="tight")
plt.show()