In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
validation

In [None]:
def avg_cats(data, col):
    return data.groupby(col).mean(numeric_only=True)

## NOTE: This is an implementation modifying an example from https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
def is_pareto_efficient_simple(data, xcol, ycol, xlowerisbetter=True, ylowerisbetter=False):
    if xlowerisbetter:
        xcost = data[xcol]
    else:
        xcost = -data[xcol]
    if ylowerisbetter:
        ycost = data[ycol]
    else:
        ycost = -data[ycol]
    costs = np.array([xcost, ycost]).T
    is_efficient = np.ones(costs.shape[0], dtype = bool)
    for i, c in enumerate(costs):
        if is_efficient[i]:
            is_efficient[is_efficient] = np.any(costs[is_efficient]<c, axis=1)  # Keep any point with a lower cost
            is_efficient[i] = True  # And keep self
    return is_efficient

def plot_across_tol(df, k, method_names, axis1, axis2, xlabel=None, ylabel=None, title=None, reverse_x = False, legend=True):
    relevant_k = df[df['k'] == k]

    for method in method_names:
        data_for_method = relevant_k[relevant_k['name'] == method]
        d1 = data_for_method[axis1]
        d2 = data_for_method[axis2]

        if method in ["Baseline", "DebiasClip"]:
            plt.scatter(d1, d2, label=method)
        else:
            plt.plot(d1, d2, label=method)
    if legend: plt.legend()
    if xlabel is not None:
        plt.xlabel(xlabel)
    else:
        plt.xlabel(axis1)
    if ylabel is not None:
        plt.ylabel(ylabel)
    else:
        plt.ylabel(axis2)
    if title is not None:
        plt.title(title)
    else:
        plt.title(f"Graph of {axis2} over {axis1} @ {k}")
    if reverse_x:
        plt.gca().invert_xaxis()
    plt.grid()
    plt.show()


In [None]:

validation = pd.read_pickle('results/occ2-3-29-validation.pkl')

methods = validation['name'].unique()
print(methods)

ks = [10, 25, 50, 100]
titles = ['Gender', 'Skin-tone', 'Subgroup Bias']
metrics = ['Avg_AbsBias_gender', 'Avg_AbsBias_skintone', 'Avg_Max_MC_Bias']

fig, ax = plt.subplots(3, 4, figsize=(10, 7))

for i in range(3):
    title = titles[i]
    metric = metrics[i]
    for j in range(4):
        k = ks[j]
        ax[i, j].set_title(f'{title} @ {k}', weight='bold')
        ax[i, j].set_ylim(0, .75)
        ax[i, j].set_xlim(.7, 0)
        ax[i, j].grid(True)
        if j == 0:
            ax[i, j].set_ylabel('Precision', weight='bold', fontsize=12)
        if title == 'Subgroup Bias':
            ax[i, j].set_xlabel('Max Subgroup Bias', weight='bold')
        else:
            ax[i, j].set_xlabel(f'AbsBias for {title}', weight='bold')
        for method in methods:
            data = validation[(validation['name'] == method) & (validation['k'] == k)]
            if method in ['Baseline', 'DebiasClip']:
                if method == 'Baseline':
                    c = 'black'
                if method == 'DebiasClip':
                    c = 'purple'
                ax[i, j].scatter(np.mean(data[metric]), np.mean(data['Avg_Precision']), label=method, s=14, c = c)
            else:
                ax[i, j].plot(avg_cats(data, "tol")[metric], avg_cats(data, "tol")['Avg_Precision'], label=method,  linewidth=2.1)

        handles, labels = ax[i, j].get_legend_handles_labels()

fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.legend(handles, labels, loc='lower center', ncols=3, fontsize='large')
fig.subplots_adjust(bottom=0.18)
fig.show()

methods = ['Baseline', 'CDI_Sum_intersectional', 'CDI_Min_intersectional', 'PBM_intersectional']
colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', '#bcbd22', '#17becf']
fig, ax = plt.subplots(1, 4, figsize=(10, 3))
ks = [10, 25, 50, 100]
titles = ['Subgroup Bias']
metrics = ['Avg_Max_MC_Bias']
for i in range(1):
    title = titles[i]
    metric = metrics[i]
    for j in range(4):
        k = ks[j]
        ax[j].set_title(f'{title} @ {k}', weight='bold')
        ax[j].set_ylim(0, .7)
        ax[j].set_xlim(.6, 0)
        ax[j].grid(True)
        if j == 0:
            ax[j].set_ylabel('Precision', weight='bold', fontsize=12)
        if title == 'Subgroup Bias':
            ax[j].set_xlabel('Max Subgroup Bias', weight='bold')
        else:
            ax[j].set_xlabel(f'AbsBias for {title}', weight='bold')
        for z, method in enumerate(methods):
            c = colors[z]
            data = validation[(validation['name'] == method) & (validation['k'] == k)]
            if method in ['Baseline', 'DebiasClip']:
                ax[j].scatter(np.mean(data[metric]), np.mean(data['Avg_Precision']), label=method, s=14, c = c)
            else:
                ax[j].plot(avg_cats(data, "tol")[metric], avg_cats(data, "tol")['Avg_Precision'], label=method, c=c, linewidth=2.1)
                bc = data.iloc[is_pareto_efficient_simple(data, 'Avg_Max_MC_Bias', 'Avg_Precision')].sort_values('Avg_Max_MC_Bias', ascending=False)
                wc = data.iloc[is_pareto_efficient_simple(data, 'Avg_Max_MC_Bias', 'Avg_Precision', xlowerisbetter=False, ylowerisbetter=True)].sort_values('Avg_Max_MC_Bias', ascending=False)
                ax[j].plot(bc[metric], bc['Avg_Precision'], label='_nolegend_', alpha=.5, c=c)
                ax[j].plot(wc[metric], wc['Avg_Precision'], label='_nolegend_', alpha=.5, c=c)

        handles, labels = ax[j].get_legend_handles_labels()

fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.legend(handles, labels, loc='lower center', ncols=4, fontsize='large')
fig.subplots_adjust(bottom=0.38)
fig.show()

methods = ['Baseline', 'CDI_Sum_intersectional', 'CDI_Min_intersectional', 'PBM_intersectional']
colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', '#bcbd22', '#17becf']
fig, ax = plt.subplots(1, 4, figsize=(10, 3))
ks = [10, 25, 50, 100]
titles = ['Subgroup Bias']
metrics = ['Avg_Max_MC_Bias']
for i in range(1):
    title = titles[i]
    metric = metrics[i]
    for j in range(4):
        k = ks[j]
        ax[j].set_title(f'{title} @ {k}', weight='bold')
        ax[j].set_ylim(0, .7)
        ax[j].set_xlim(.6, 0)
        ax[j].grid(True)
        if j == 0:
            ax[j].set_ylabel('Recall', weight='bold', fontsize=12)
        if title == 'Subgroup Bias':
            ax[j].set_xlabel('Max Subgroup Bias', weight='bold')
        else:
            ax[j].set_xlabel(f'AbsBias for {title}', weight='bold')
        for z, method in enumerate(methods):
            c = colors[z]
            data = validation[(validation['name'] == method) & (validation['k'] == k)]
            if method in ['Baseline', 'DebiasClip']:
                ax[j].scatter(np.mean(data[metric]), np.mean(data['Avg_Recall']), label=method, s=14, c = c)
            else:
                ax[j].plot(avg_cats(data, "tol")[metric], avg_cats(data, "tol")['Avg_Recall'], label=method, c=c, linewidth=2.1)
                bc = data.iloc[is_pareto_efficient_simple(data, 'Avg_Max_MC_Bias', 'Avg_Recall')].sort_values('Avg_Max_MC_Bias', ascending=False)
                wc = data.iloc[is_pareto_efficient_simple(data, 'Avg_Max_MC_Bias', 'Avg_Recall', xlowerisbetter=False, ylowerisbetter=True)].sort_values('Avg_Max_MC_Bias', ascending=False)
                ax[j].plot(bc[metric], bc['Avg_Recall'], label='_nolegend_', alpha=.5, c=c)
                ax[j].plot(wc[metric], wc['Avg_Recall'], label='_nolegend_', alpha=.5, c=c)

        handles, labels = ax[j].get_legend_handles_labels()

fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.legend(handles, labels, loc='lower center', ncols=4, fontsize='large')
fig.subplots_adjust(bottom=0.38)
fig.show()


        
        


In [None]:
celeba_res = pd.read_pickle('results/celeba-validation.pkl')

all_methods = celeba_res['name'].unique()
print(all_methods)

core_methods = all_methods
print(celeba_res.head())



In [None]:
ks = [10, 25, 50, 100]
titles = ['Gender', 'Skin-tone', 'Age', 'Subgroup Bias']
metrics = ['Avg_AbsBias_Male', 'Avg_AbsBias_Pale_Skin', 'Avg_AbsBias_Young','Avg_Max_MC_Bias']
core_methods = all_methods
fig, ax = plt.subplots(4, 4, figsize=(10, 10))

for i in range(4):
    title = titles[i]
    metric = metrics[i]
    for j in range(4):
        k = ks[j]
        ax[i, j].set_title(f'{title} @ {k}', weight='bold')
        ax[i, j].set_ylim(0, 1)
        ax[i, j].set_xlim(1, 0)
        ax[i, j].grid(True)
        if j == 0:
            ax[i, j].set_ylabel('Precision', weight='bold', fontsize=12)
        if title == 'Subgroup Bias':
            ax[i, j].set_xlabel('Max Subgroup Bias', weight='bold')
        else:
            ax[i, j].set_xlabel(f'AbsBias for {title}', weight='bold')
        for method in core_methods:
            data = celeba_res[(celeba_res['name'] == method) & (celeba_res['k'] == k)]
            if method in ['Baseline', 'DebiasClip']:
                if method == 'Baseline':
                    c = 'black'
                if method == 'DebiasClip':
                    c = 'purple'
                ax[i, j].scatter(np.mean(data[metric]), np.mean(data['Avg_Precision']), label=method, s=14, c=c)
            else:
                avg_c = avg_cats(data, "tol")
                #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
                pareto_points = avg_c #avg_c.iloc[pareto_front]
                ax[i, j].plot(pareto_points[metric], pareto_points['Avg_Precision'], label=method, linewidth=2.1)

        handles, labels = ax[i, j].get_legend_handles_labels()

fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.legend(handles, labels, loc='lower center', ncols=3, fontsize='large')
fig.subplots_adjust(bottom=0.12)
fig.show()

core_methods = ['Baseline', 'CDI_Sum_intersectional', 'CDI_Min_intersectional', 'PBM_three_attributes']
colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', '#bcbd22', '#17becf']
fig, ax = plt.subplots(1, 4, figsize=(10, 3))
ks = [10, 25, 50, 100]
titles = ['Subgroup Bias']
metrics = ['Avg_Max_MC_Bias']
for i in range(1):
    title = titles[i]
    metric = metrics[i]
    for j in range(4):
        k = ks[j]
        ax[j].set_title(f'{title} @ {k}', weight='bold')
        ax[j].set_ylim(0, .85)
        ax[j].set_xlim(.7, 0)
        ax[j].grid(True)
        if j == 0:
            ax[j].set_ylabel('Precision',weight='bold', fontsize=12)
        if title == 'Subgroup Bias':
            ax[j].set_xlabel('Max Subgroup Bias', weight='bold')
        else:
            ax[j].set_xlabel(f'AbsBias for {title}', weight='bold')
        for z, method in enumerate(core_methods):
            c = colors[z]
            data = celeba_res[(celeba_res['name'] == method) & (celeba_res['k'] == k)]
            if method in ['Baseline', 'DebiasClip']:
                ax[j].scatter(np.mean(data[metric]), np.mean(data['Avg_Precision']), label=method, s=14, c = c)
            else:
                ax[j].plot(avg_cats(data, "tol")[metric], avg_cats(data, "tol")['Avg_Precision'], label=method, c=c, linewidth=2.1)
                bc = data.iloc[is_pareto_efficient_simple(data, 'Avg_Max_MC_Bias', 'Avg_Precision')].sort_values('Avg_Max_MC_Bias', ascending=False)
                wc = data.iloc[is_pareto_efficient_simple(data, 'Avg_Max_MC_Bias', 'Avg_Precision', xlowerisbetter=False, ylowerisbetter=True)].sort_values('Avg_Max_MC_Bias', ascending=False)
                ax[j].plot(bc[metric], bc['Avg_Precision'], label='_nolegend_', alpha=.5, c=c)
                ax[j].plot(wc[metric], wc['Avg_Precision'], label='_nolegend_', alpha=.5, c=c)

        handles, labels = ax[j].get_legend_handles_labels()

fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.legend(handles, labels, loc='lower center', ncols=4, fontsize='large')
fig.subplots_adjust(bottom=0.34)
fig.show()


In [None]:
df1 = pd.read_pickle('results/giis-3-21.pkl')
df2 = pd.read_pickle('results/giis-3-24-CLIP-euc.pkl')
df3 = pd.read_pickle('results/giis-cdi_features.pkl')
df4 = pd.read_pickle('results/giis-pbm-3-22.pkl')


names_to_drop = df2['name'].unique().tolist() + df3['name'].unique().tolist() + df4['name'].unique().tolist()

df1_dropped = df1[~df1['name'].isin(names_to_drop)]
print(df1_dropped['name'].unique())

giis_df = pd.concat([df1_dropped, df2, df3, df4])
print(giis_df['name'].unique())

pd.to_pickle(giis_df, 'results/giis-complete.pkl')

In [None]:
df1 = pd.read_pickle('results/occ2-3-23.pkl')
df2 = pd.read_pickle('results/occ2-3-24-CLIP.pkl')
df3 = pd.read_pickle('results/occ2-3-24-eucmethods.pkl')
df4 = pd.read_pickle('results/occ2-3-24-skintone.pkl')

dfs = [df1, df2, df3, df4]
for df in dfs:
    print(df['name'].unique()) 

names_to_drop = df2['name'].unique().tolist() + df3['name'].unique().tolist() + df4['name'].unique().tolist()

df1_dropped = df1[~df1['name'].isin(names_to_drop)]
print(df1_dropped['name'].unique())

occ2_df = pd.concat([df1_dropped, df2, df3, df4])
print(occ2_df['name'].unique())

pd.to_pickle(occ2_df, 'results/occ2-complete.pkl')

In [None]:
occ2_df = pd.read_pickle('results/occ2-complete.pkl')

methods = ['CDI_Sum_gender', 'CDI_Sum_skintone', 'CDI_Sum_intersectional',
 'CDI_Sum_3_attr', 'CDI_Sum_4_attr']
k = [10, 25, 50, 100]

metrics = ['Avg_AbsBias_gender', 'Avg_AbsBias_skintone', 'Avg_Max_MC_Bias']


for k in k:
    plot_across_tol(occ2_df, k, methods, metrics[0], 'Avg_Precision', xlabel='AbsBias in Gender', ylabel='Precision', title=f'Gender @ {k}', reverse_x=True)
    plot_across_tol(occ2_df, k, methods, metrics[1], 'Avg_Precision', xlabel='AbsBias in Skintone', ylabel='Precision', title=f'Skintone @ {k}', reverse_x=True)
    plot_across_tol(occ2_df, k, methods, metrics[2], 'Avg_Precision', xlabel='Subgroup Fairness', ylabel='Precision', title=f'Subgroup @ {k}', reverse_x=True)

    

In [None]:
giis = pd.read_pickle('results/giis-complete.pkl')

print(giis['name'].unique())
methods = ['Baseline', 'CDI_Sum_gender', 'CDI_Sum_intersectional', 'CDI_Min_gender', 'CDI_Min_intersectional', 'PBM_gender',
 'PBM_intersectional','CLIP_gender', 'CLIP_intersectional', 'DebiasClip']
colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', 'purple']

ks = [10, 25, 50, 100]

fig, ax = plt.subplots(1, 4, figsize=(10, 3.5))
for i, k in enumerate(ks):
    ax[i].set_title(f'Recall - Bias @ {k}', weight='bold')
    ax[i].set_ylim(0, .7)
    ax[i].set_xlim(.8, 0)
    ax[i].grid(True)
    if j == 0:
        ax[i].set_ylabel('Recall', weight='bold', fontsize=12)
    ax[i].set_xlabel('Gender Bias', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[j]
        data = giis[(giis['name'] == method) & (giis['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[i].scatter(np.mean(data['Avg_AbsBias_gender']), np.mean(data['Avg_Recall']), label=method, s=15, c=c)
        else:
            avg_c = avg_cats(data, "tol")
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[i].plot(pareto_points['Avg_AbsBias_gender'], pareto_points['Avg_Recall'], label=method, c=c, linewidth=2.1)

    handles, labels = ax[i].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncols=4, fontsize='large')
fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.subplots_adjust(bottom=0.42)
fig.show()

In [None]:
occ2 = pd.read_pickle('results/occ2-complete.pkl')
occ2_5 = pd.read_pickle('results/occ2-pbm-five-concepts.pkl')
occ2 = pd.concat([occ2, occ2_5])

methods = ['Baseline', 'PBM_gender', 'PBM_intersectional',
 'PBM_skintone', 'PBM_three_attributes', 'PBM_four_attributes', 'PBM_five_attributes']
colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', 'purple']

ks = [10, 25, 50, 100]

fig, ax = plt.subplots(1, 4, figsize=(10, 3.5))
for i, k in enumerate(ks):
    ax[i].set_title(f'Precision-Subgroup @ {k}', weight='bold')
    ax[i].set_ylim(0, .7)
    ax[i].set_xlim(.8, 0)
    ax[i].grid(True)
    if i == 0:
        ax[i].set_ylabel('Precision', weight='bold', fontsize=12)
    ax[i].set_xlabel('Subgroup Bias', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[j]
        data = occ2[(occ2['name'] == method) & (occ2['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[i].scatter(np.mean(data['Avg_Max_MC_Bias']), np.mean(data['Avg_Precision']), label=method, s=15, c=c)
        else:
            avg_c = avg_cats(data, "tol")
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[i].plot(pareto_points['Avg_Max_MC_Bias'], pareto_points['Avg_Precision'], label=method, c=c, linewidth=2.1)

    handles, labels = ax[i].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncols=4, fontsize='large')
fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.subplots_adjust(bottom=0.35)
fig.show()

occ2 = pd.read_pickle('results/occ2-complete.pkl')

methods = ['Baseline', 'CLIP_gender', 'CLIP_skintone', 'CLIP_intersectional', 'CLIP_three_attributes']
colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', 'purple']

ks = [10, 25, 50, 100]

fig, ax = plt.subplots(1, 4, figsize=(10, 3.2))
for i, k in enumerate(ks):
    ax[i].set_title(f'Precision-Subgroup @ {k}', weight='bold')
    ax[i].set_ylim(0, .7)
    ax[i].set_xlim(.8, 0)
    ax[i].grid(True)
    if i == 0:
        ax[i].set_ylabel('Precision', weight='bold', fontsize=12)
    ax[i].set_xlabel('Subgroup Bias', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[j]
        data = occ2[(occ2['name'] == method) & (occ2['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[i].scatter(np.mean(data['Avg_Max_MC_Bias']), np.mean(data['Avg_Precision']), label=method, s=15, c=c)
        else:
            avg_c = data
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[i].plot(pareto_points['Avg_Max_MC_Bias'], pareto_points['Avg_Precision'], label=method, c=c, linewidth=2.1)

    handles, labels = ax[i].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncols=5, fontsize='large')
fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.subplots_adjust(bottom=0.25)
fig.show()

In [None]:
celeba_res = pd.read_pickle('results/celeba-3-24.pkl')

print(celeba_res['name'].unique())

ks = [10, 25, 50, 100]
titles = ['Gender', 'Skin-tone', 'Age', 'Subgroup Bias']
metrics = ['Avg_AbsBias_Male', 'Avg_AbsBias_Pale_Skin', 'Avg_AbsBias_Young','Avg_Max_MC_Bias']
core_methods = ['Baseline', 'CDI_Sum_4_attr', 'CDI_Min_4_attr', 'PBM_four_attributes', 'DebiasClip']
fig, ax = plt.subplots(4, 4, figsize=(10, 10))

for i in range(4):
    title = titles[i]
    metric = metrics[i]
    for j in range(4):
        k = ks[j]
        ax[i, j].set_title(f'{title} @ {k}', weight='bold')
        ax[i, j].set_ylim(0, 1)
        ax[i, j].set_xlim(1, 0)
        ax[i, j].grid(True)
        if j == 0:
            ax[i, j].set_ylabel('Precision', weight='bold', fontsize=12)
        if title == 'Subgroup Bias':
            ax[i, j].set_xlabel('Max Subgroup Bias', weight='bold')
        else:
            ax[i, j].set_xlabel(f'AbsBias for {title}', weight='bold')
        for method in core_methods:
            data = celeba_res[(celeba_res['name'] == method) & (celeba_res['k'] == k)]
            if method in ['Baseline', 'DebiasClip']:
                if method == 'Baseline':
                    c = 'black'
                if method == 'DebiasClip':
                    c = 'purple'
                ax[i, j].scatter(np.mean(data[metric]), np.mean(data['Avg_Precision']), label=method, s=14, c=c)
            else:
                avg_c = data
                #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
                pareto_points = avg_c #avg_c.iloc[pareto_front]
                ax[i, j].plot(pareto_points[metric], pareto_points['Avg_Precision'], label=method, linewidth=2.1)

        handles, labels = ax[i, j].get_legend_handles_labels()

fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.legend(handles, labels, loc='lower center', ncols=3, fontsize='large')
fig.subplots_adjust(bottom=0.12)
fig.show()



In [None]:
celeba_res = pd.read_pickle('results/celeba-validation.pkl')
ks = [10, 25, 50, 100]
titles = ['Gender', 'Skin-tone', 'Age']
metrics = ['Avg_Bias_Male', 'Avg_Bias_Pale_Skin', 'Avg_Bias_Young']
core_methods = all_methods
fig, ax = plt.subplots(3, 4, figsize=(10, 8))

for i in range(3):
    title = titles[i]
    metric = metrics[i]
    for j in range(4):
        k = ks[j]
        ax[i, j].set_title(f'{title} @ {k}', weight='bold')
        #ax[i, j].set_ylim(0, 1)
        #ax[i, j].set_xlim(1, 0)
        ax[i, j].grid(True)
        if j == 0:
            ax[i, j].set_ylabel('Precision', weight='bold', fontsize=12)
        if title == 'Subgroup Bias':
            ax[i, j].set_xlabel('Max Subgroup Bias', weight='bold')
        else:
            ax[i, j].set_xlabel(f'Bias for {title}', weight='bold')
        for method in core_methods:
            data = celeba_res[(celeba_res['name'] == method) & (celeba_res['k'] == k)]
            if method in ['Baseline', 'DebiasClip']:
                if method == 'Baseline':
                    c = 'black'
                if method == 'DebiasClip':
                    c = 'purple'
                ax[i, j].scatter(np.mean(data[metric]), np.mean(data['Avg_Precision']), label=method, s=14, c=c)
            else:
                avg_c = avg_cats(data, "tol")
                #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
                pareto_points = avg_c #avg_c.iloc[pareto_front]
                ax[i, j].plot(pareto_points[metric], pareto_points['Avg_Precision'], label=method, linewidth=2.1)

        handles, labels = ax[i, j].get_legend_handles_labels()

fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.legend(handles, labels, loc='lower center', ncols=3, fontsize='large')
fig.subplots_adjust(bottom=0.16)
fig.show()

In [None]:
celeba_res = pd.read_pickle('results/celeba-validation-white-nonwhite.pkl')

ks = [10, 25, 50, 100]
titles = ['Gender', 'Skin-tone', 'Age', 'Subgroup Bias']
metrics = ['Avg_AbsBias_Male', 'Avg_AbsBias_Pale_Skin', 'Avg_AbsBias_Young','Avg_Max_MC_Bias']
core_methods = all_methods
fig, ax = plt.subplots(4, 4, figsize=(10, 10))

for i in range(4):
    title = titles[i]
    metric = metrics[i]
    for j in range(4):
        k = ks[j]
        ax[i, j].set_title(f'{title} @ {k}', weight='bold')
        ax[i, j].set_ylim(0, 1)
        ax[i, j].set_xlim(1, 0)
        ax[i, j].grid(True)
        if j == 0:
            ax[i, j].set_ylabel('Precision')
        if title == 'Subgroup Bias':
            ax[i, j].set_xlabel('Max Subgroup Bias')
        else:
            ax[i, j].set_xlabel(f'AbsBias for {title}')
        for method in core_methods:
            data = celeba_res[(celeba_res['name'] == method) & (celeba_res['k'] == k)]
            if method in ['Baseline', 'DebiasClip']:
                if method == 'Baseline':
                    c = 'black'
                if method == 'DebiasClip':
                    c = 'purple'
                ax[i, j].scatter(np.mean(data[metric]), np.mean(data['Avg_Precision']), label=method, s=10, c=c)
            else:
                avg_c = avg_cats(data, "tol")
                #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
                pareto_points = avg_c #avg_c.iloc[pareto_front]
                ax[i, j].plot(pareto_points[metric], pareto_points['Avg_Precision'], label=method)

        handles, labels = ax[i, j].get_legend_handles_labels()

fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.legend(handles, labels, loc='lower center', ncols=3)
fig.subplots_adjust(bottom=0.12)
fig.show()

In [None]:
giis = pd.read_pickle('results/giis-complete.pkl')

print(giis['name'].unique())
methods = ['Baseline', 'CDI_Sum_intersectional', 'CDI_Sum_gender', 'CDI_TrueConcept', 'CDI_Random', 'CDI_Features']
colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', 'purple']

ks = [10, 25, 50, 100]

fig, ax = plt.subplots(1, 4, figsize=(10, 3.5))
for i, k in enumerate(ks):
    ax[i].set_title(f'Precision - Bias @ {k}', weight='bold')
    ax[i].set_ylim(0, .7)
    ax[i].set_xlim(.8, 0)
    ax[i].grid(True)
    if i == 0:
        ax[i].set_ylabel('Precision', weight='bold', fontsize=12)
    ax[i].set_xlabel('AbsBias for Gender', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[j]
        data = giis[(giis['name'] == method) & (giis['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[i].scatter(np.mean(data['Avg_AbsBias_gender']), np.mean(data['Avg_Precision']), label=method, s=15, c=c)
        else:
            avg_c = avg_cats(data, "tol")
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[i].plot(pareto_points['Avg_AbsBias_gender'], pareto_points['Avg_Precision'], label=method, c=c, linewidth=2.1)

    handles, labels = ax[i].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncols=3, fontsize='large')
fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.subplots_adjust(bottom=0.35)
fig.show()

In [None]:
celeba_res = pd.read_pickle('results/celeba-3-24.pkl')
random = pd.read_pickle('results/celeba-random.pkl')
celeba_res = pd.concat([celeba_res, random])
print(celeba_res['name'].unique())
core_methods = ['Baseline', 'CDI_Sum_intersectional', 'CDI_TrueConcept', 'CDI_Random', 'CDI_Features']

fig, ax = plt.subplots(1, 4, figsize=(10, 3))
ks = [10, 25, 50, 100]
titles = ['Subgroup Bias']
metrics = ['Avg_Max_MC_Bias']
for i in range(1):
    title = titles[i]
    metric = metrics[i]
    for j in range(4):
        k = ks[j]
        ax[j].set_title(f'{title} @ {k}', weight='bold')
        ax[j].set_ylim(0, .85)
        ax[j].set_xlim(.7, 0)
        ax[j].grid(True)
        if j == 0:
            ax[j].set_ylabel('Precision',weight='bold', fontsize=12)
        if title == 'Subgroup Bias':
            ax[j].set_xlabel('Max Subgroup Bias', weight='bold')
        else:
            ax[j].set_xlabel(f'AbsBias for {title}', weight='bold')
        for z, method in enumerate(core_methods):
            c = colors[z]
            data = celeba_res[(celeba_res['name'] == method) & (celeba_res['k'] == k)]
            if method in ['Baseline', 'DebiasClip']:
                ax[j].scatter(np.mean(data[metric]), np.mean(data['Avg_Precision']), label=method, s=14, c = c)
            else:
                ax[j].plot(avg_cats(data, "tol")[metric], avg_cats(data, "tol")['Avg_Precision'], label=method, c=c, linewidth=2.1)

        handles, labels = ax[j].get_legend_handles_labels()

fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.legend(handles, labels, loc='lower center', ncols=5, fontsize='large')
fig.subplots_adjust(bottom=0.32)
fig.show()


In [None]:
giis = pd.read_pickle('results/giis-complete.pkl')
#print(giis.head())
#print(giis['name'].unique())
methods = ['Baseline', 'CDI_Sum_intersectional', 'CDI_Min_intersectional', 
 'PBM_intersectional', 'CLIP_intersectional', 'DebiasClip']
colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', 'purple']

ks = [10, 25, 50, 100]

fig, ax = plt.subplots(1, 4, figsize=(10, 3.5))
for i, k in enumerate(ks):
    ax[i].set_title(f'Precision - Acc-Abs-Bias @ {k}', weight='bold')
    ax[i].set_ylim(0, .7)
    ax[i].set_xlim(.85, 0)
    ax[i].grid(True)
    if j == 0:
        ax[i].set_ylabel('Recall', weight='bold', fontsize=12)
    ax[i].set_xlabel('Abs Gender Bias in Acc.', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[j]
        data = giis[(giis['name'] == method) & (giis['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[i].scatter(np.mean(data['Avg_AbsBias_for_Accurate_gender']), np.mean(data['Avg_Precision']), label=method, s=15, c=c)
        else:
            avg_c = avg_cats(data, "tol")
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[i].plot(pareto_points['Avg_AbsBias_for_Accurate_gender'], pareto_points['Avg_Precision'], label=method, c=c, linewidth=2.1)

    handles, labels = ax[i].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncols=4, fontsize='large')
fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.subplots_adjust(bottom=0.42)
fig.show()

In [None]:
validation = pd.read_pickle('results/occ2-3-29-validation.pkl')

methods = validation['name'].unique()
print(methods)

ks = [10, 25, 50, 100]
titles = ['Gender', 'Skin-tone', 'Subgroup Bias']
metrics = ['Avg_AbsBias_for_Accurate_gender', 'Avg_AbsBias_for_Accurate_skintone']

fig, ax = plt.subplots(2, 4, figsize=(10, 5.5))

for i in range(2):
    title = titles[i]
    metric = metrics[i]
    for j in range(4):
        k = ks[j]
        ax[i, j].set_title(f'{title} @ {k}', weight='bold')
        ax[i, j].set_ylim(0, .75)
        ax[i, j].set_xlim(.7, 0)
        ax[i, j].grid(True)
        if j == 0:
            ax[i, j].set_ylabel('Precision', weight='bold', fontsize=12)
        ax[i, j].set_xlabel(f'AbsBias for {title} in Acc.', weight='bold')
        for method in methods:
            data = validation[(validation['name'] == method) & (validation['k'] == k)]
            if method in ['Baseline', 'DebiasClip']:
                if method == 'Baseline':
                    c = 'black'
                if method == 'DebiasClip':
                    c = 'purple'
                ax[i, j].scatter(np.mean(data[metric]), np.mean(data['Avg_Precision']), label=method, s=14, c = c)
            else:
                ax[i, j].plot(avg_cats(data, "tol")[metric], avg_cats(data, "tol")['Avg_Precision'], label=method,  linewidth=2.1)

        handles, labels = ax[i, j].get_legend_handles_labels()

fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.legend(handles, labels, loc='lower center', ncols=3, fontsize='large')
fig.subplots_adjust(bottom=0.24)
fig.show()

In [None]:
occ2 = pd.read_pickle('results/occ2-complete.pkl')

methods = ['Baseline', 'CDI_Sum_intersectional', 'CDI_Sum_3_attr', 'CDI_EucSum_3_attr', 'CDI_EucSum_intersectional']
colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', 'purple']

ks = [10, 25, 50, 100]

fig, ax = plt.subplots(1, 4, figsize=(10, 3.5))
for i, k in enumerate(ks):
    ax[i].set_title(f'Precision-Subgroup @ {k}', weight='bold')
    ax[i].set_ylim(0, .7)
    ax[i].set_xlim(.8, 0)
    ax[i].grid(True)
    if i == 0:
        ax[i].set_ylabel('Precision', weight='bold', fontsize=12)
    ax[i].set_xlabel('Subgroup Bias', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[j]
        data = occ2[(occ2['name'] == method) & (occ2['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[i].scatter(np.mean(data['Avg_Max_MC_Bias']), np.mean(data['Avg_Precision']), label=method, s=15, c=c)
        else:
            avg_c = avg_cats(data, "tol")
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[i].plot(pareto_points['Avg_Max_MC_Bias'], pareto_points['Avg_Precision'], label=method, c=c, linewidth=2.1)

    handles, labels = ax[i].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncols=3, fontsize='large')
fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.subplots_adjust(bottom=0.35)
fig.show()

methods = ['Baseline', 'CDI_Min_intersectional', 'CDI_Min_3_attr', 'CDI_EucMin_3_attr', 'CDI_EucMin_intersectional']

colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', 'purple']

ks = [10, 25, 50, 100]

fig, ax = plt.subplots(1, 4, figsize=(10, 3.5))
for i, k in enumerate(ks):
    ax[i].set_title(f'Precision-Subgroup @ {k}', weight='bold')
    ax[i].set_ylim(0, .7)
    ax[i].set_xlim(.8, 0)
    ax[i].grid(True)
    if i == 0:
        ax[i].set_ylabel('Precision', weight='bold', fontsize=12)
    ax[i].set_xlabel('Subgroup Bias', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[j]
        data = occ2[(occ2['name'] == method) & (occ2['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[i].scatter(np.mean(data['Avg_Max_MC_Bias']), np.mean(data['Avg_Precision']), label=method, s=15, c=c)
        else:
            avg_c = avg_cats(data, "tol")
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[i].plot(pareto_points['Avg_Max_MC_Bias'], pareto_points['Avg_Precision'], label=method, c=c, linewidth=2.1)

    handles, labels = ax[i].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncols=3, fontsize='large')
fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.subplots_adjust(bottom=0.35)
fig.show()


In [None]:
giis_data = pd.read_csv('datasets/gender_in_image_search/gender_labelled_images.csv')
occupations_data = pd.read_csv('datasets/occupations_2/occupations_labels.csv')
celeba_data = pd.read_csv('datasets/celeba/list_attr_celeba.csv')
celeba_primary_data = pd.read_csv('datasets/celeba/alg_testing.csv')

print(f"GIIS gender: {giis_data['image_gender'].value_counts() / len(giis_data)}")
print(f"Occupations gender: {occupations_data['gender'].value_counts()/ len(occupations_data)}")
print(f"Occupations skintone: {occupations_data['skintone'].value_counts() / len(occupations_data)}")
print(f"CelebA (entire) gender: {celeba_data['Male'].value_counts() / len(celeba_data)}")
print(f"CelebA (entire) skintone: {celeba_data['Pale_Skin'].value_counts() / len(celeba_data)}")
print(f"CelebA (entire) age: {celeba_data['Young'].value_counts() / len(celeba_data)}")
print(f"CelebA (primary) gender: {celeba_primary_data['Male'].value_counts() / len(celeba_primary_data)}")
print(f"CelebA (primary) skintone: {celeba_primary_data['Pale_Skin'].value_counts() / len(celeba_primary_data)}")
print(f"CelebA (primary) age: {celeba_primary_data['Young'].value_counts() / len(celeba_primary_data)}")

In [None]:
gs_df = pd.read_pickle('results/giis-complete.pkl')
occ2_df = pd.read_pickle('results/occ2-3-29-validation.pkl')
celeba_df = pd.read_pickle('results/celeba-validation.pkl')

cols = ['name', 'k', 'tol', 'Avg_Precision', 'Avg_AbsBias_gender', 'Avg_Bias_gender']

key_models = ['CDI_Sum_intersectional', 'CDI_Min_intersectional']
tols = [.02]
gs_df1 = gs_df[gs_df['k'].isin([25])]
gs_df1 = gs_df1[gs_df1['name'].isin(key_models)]
gs_df1 = gs_df1[gs_df1['tol'].isin(tols)][cols]


gs_df2 = gs_df[gs_df['k'].isin([25])]
gs_df2 = gs_df2[gs_df2['name'] == 'Baseline'][cols]


gs_df3 = gs_df[gs_df['k'].isin([25])]
gs_df3 = gs_df3[gs_df3['name'].isin(['DebiasClip', 'PBM_intersectional'])][cols]
gs_df3 = gs_df3[gs_df3['tol'] == 0]


gs_df4 = gs_df[gs_df['k'].isin([25])]
gs_df4 = gs_df4[gs_df4['name'] == 'CLIP_intersectional'][cols]
gs_df4 = gs_df4[gs_df4['tol'] == 200]
print(gs_df4)

gs = pd.concat([gs_df2, gs_df1, gs_df3, gs_df4])
print(gs)
gs.to_csv('results/giis-table.csv')

In [None]:
occ2_c = pd.read_pickle('results/occ2-extra_concepts.pkl')

print(occ2_c['name'].unique())
print(occ2_c['k'].unique())

colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', 'purple']

ks = [25, 50]

methods = ['Baseline', 'CDI_Min_3_attr', 'CDI_Min_6_attr', 'CDI_Min_8_attr']

fig, ax = plt.subplots(2, 2, figsize=(6, 6.))
q = 0
c = 0
for i, k in enumerate(ks):
    ax[q, i].set_title(f'Precision - Bias @ {k}', weight='bold')
    ax[q, i].set_ylim(0, .7)
    ax[q, i].set_xlim(.8, 0)
    ax[q, i].grid(True)
    if i == 0:
        ax[q, i].set_ylabel('Precision', weight='bold', fontsize=12)
    ax[q, i].set_xlabel('Subgroup Bias', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[j]
        data = occ2_c[(occ2_c['name'] == method) & (occ2_c['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[q, i].scatter(np.mean(data['Avg_Max_MC_Bias']), np.mean(data['Avg_Precision']), label=method, s=15, c='black')
        else:
            avg_c = avg_cats(data, "tol")
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[q, i].plot(pareto_points['Avg_Max_MC_Bias'], pareto_points['Avg_Precision'], label=method, c=c, linewidth=2.1)

    handles1, labels1 = ax[q, i].get_legend_handles_labels()

methods = ['Baseline', 'CDI_Sum_3_attr', 'CDI_Sum_6_attr', 'CDI_Sum_8_attr']
q = 1
for i, k in enumerate(ks):
    ax[q, i].set_title(f'Precision - Bias @ {k}', weight='bold')
    ax[q, i].set_ylim(0, .7)
    ax[q, i].set_xlim(.8, 0)
    ax[q, i].grid(True)
    if i == 0:
        ax[q, i].set_ylabel('Precision', weight='bold', fontsize=12)
    ax[q, i].set_xlabel('Subgroup Bias', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[3 + j]
        data = occ2_c[(occ2_c['name'] == method) & (occ2_c['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[q, i].scatter(np.mean(data['Avg_Max_MC_Bias']), np.mean(data['Avg_Precision']), label=method, s=15, c='black')
        else:
            avg_c = avg_cats(data, "tol")
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[q, i].plot(pareto_points['Avg_Max_MC_Bias'], pareto_points['Avg_Precision'], label=method, c=c, linewidth=2.1)

    handles2, labels2 = ax[q, i].get_legend_handles_labels()

handles1 += handles2[1:]
labels1 += labels2[1:]

fig.legend(handles1, labels1, loc='lower center', ncols=3, fontsize=11)
fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.subplots_adjust(bottom=0.25)
fig.show()


In [None]:
occ2_c = pd.read_pickle('results/occ2-extra_concepts.pkl')

print(occ2_c['name'].unique())
print(occ2_c['k'].unique())

colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', 'purple', 'red', 'orange', 'green', 'blue', 'indigo', 'violet']

ks = [25, 50]

methods = ['Baseline', 'CDI_Min_8_attr', 'CDI_Sum_8_attr', 'CDI_EucMin_8_attr', 'CDI_EucSum_8_attr']

fig, ax = plt.subplots(1, 2, figsize=(6, 3.4))
q = 0
c = 0
for i, k in enumerate(ks):
    ax[i].set_title(f'Precision - Subgroup @ {k}', weight='bold')
    ax[i].set_ylim(0, .7)
    ax[i].set_xlim(.8, 0)
    ax[i].grid(True)
    if i == 0:
        ax[i].set_ylabel('Precision', weight='bold', fontsize=12)
    ax[i].set_xlabel('Subgroup Bias', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[j]
        data = occ2_c[(occ2_c['name'] == method) & (occ2_c['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[i].scatter(np.mean(data['Avg_AbsBias_gender']), np.mean(data['Avg_Precision']), label=method, s=15, c='black')
        else:
            avg_c = avg_cats(data, "tol")
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[i].plot(pareto_points['Avg_AbsBias_gender'], pareto_points['Avg_Precision'], label=method, c=c, linewidth=2.1)

    handles1, labels1 = ax[i].get_legend_handles_labels()


fig.legend(handles1, labels1, loc='lower center', ncols=3, fontsize=9)
fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.subplots_adjust(bottom=0.3)
fig.show()

In [None]:
occ2_c = pd.read_pickle('results/occ2-extra_concepts.pkl')

print(occ2_c['name'].unique())
print(occ2_c['k'].unique())

colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', 'purple', 'red', 'orange', 'green', 'blue', 'indigo', 'violet']

ks = [25, 50]

methods = ['Baseline', 'CDI_Min_overconcept', 'CDI_Sum_overconcept', 'CDI_EucMin_overconcept', 'CDI_EucSum_overconcept']

fig, ax = plt.subplots(1, 2, figsize=(6, 3.4))
q = 0
c = 0
for i, k in enumerate(ks):
    ax[i].set_title(f'Precision - Subgroup @ {k}', weight='bold')
    ax[i].set_ylim(0, .7)
    ax[i].set_xlim(.8, 0)
    ax[i].grid(True)
    if i == 0:
        ax[i].set_ylabel('Precision', weight='bold', fontsize=12)
    ax[i].set_xlabel('AbsBias in Gender', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[j]
        data = occ2_c[(occ2_c['name'] == method) & (occ2_c['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[i].scatter(np.mean(data['Avg_AbsBias_gender']), np.mean(data['Avg_Precision']), label=method, s=15, c='black')
        else:
            avg_c = avg_cats(data, "tol")
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[i].plot(pareto_points['Avg_AbsBias_gender'], pareto_points['Avg_Precision'], label=method, c=c, linewidth=2.1)

    handles1, labels1 = ax[i].get_legend_handles_labels()


fig.legend(handles1, labels1, loc='lower center', ncols=3, fontsize=9)
fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.subplots_adjust(bottom=0.3)
fig.show()

In [None]:
occ2_male_queries = pd.read_pickle('results/occ2-3-31-gendered_queries_female.pkl')
occ2_female_queries = pd.read_pickle('results/occ2-3-31-gendered_queries_female.pkl')

print(occ2_male_queries)



colors = ['black', '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2',
 '#7f7f7f', 'purple']

ks = [25, 50]

methods = occ2_male_queries['name'].unique()

fig, ax = plt.subplots(2, 2, figsize=(6, 6.))
q = 0
for i, k in enumerate(ks):
    ax[q, i].set_title(f'Precision - Bias @ {k}', weight='bold')
    ax[q, i].set_ylim(0, .7)
    ax[q, i].set_xlim(.8, 0)
    ax[q, i].grid(True)
    if i == 0:
        ax[q, i].set_ylabel('Precision', weight='bold', fontsize=12)
    ax[q, i].set_xlabel('Subgroup Bias', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[j]
        data = occ2_male_queries[(occ2_male_queries['name'] == method) & (occ2_male_queries['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[q, i].scatter(np.mean(data['Avg_Bias_gender']), np.mean(data['Avg_Precision']), label=method, s=15, c='black')
        else:
            avg_c = avg_cats(data, "tol")
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[q, i].plot(pareto_points['Avg_Bias_gender'], pareto_points['Avg_Precision'], label=method, c=c, linewidth=2.1)

    handles1, labels1 = ax[q, i].get_legend_handles_labels()

methods = ['Baseline', 'CDI_Sum_3_attr', 'CDI_Sum_6_attr', 'CDI_Sum_8_attr']
q = 1
for i, k in enumerate(ks):
    ax[q, i].set_title(f'Precision - Bias @ {k}', weight='bold')
    ax[q, i].set_ylim(0, .7)
    ax[q, i].set_xlim(.8, 0)
    ax[q, i].grid(True)
    if i == 0:
        ax[q, i].set_ylabel('Precision', weight='bold', fontsize=12)
    ax[q, i].set_xlabel('Subgroup Bias', weight='bold')    
    for j, method in enumerate(methods):
        c = colors[3 + j]
        data = occ2_female_queries[(occ2_female_queries['name'] == method) & (occ2_female_queries['k'] == k)]
        if method in ['Baseline', 'DebiasClip']:
            ax[q, i].scatter(np.mean(data['Avg_Bias_gender']), np.mean(data['Avg_Precision']), label=method, s=15, c='black')
        else:
            avg_c = avg_cats(data, "tol")
            #pareto_front = is_pareto_efficient_simple(avg_c, metric, 'Avg_Precision')
            pareto_points = avg_c #avg_c.iloc[pareto_front]
            ax[q, i].plot(pareto_points['Avg_Bias_gender'], pareto_points['Avg_Precision'], label=method, c=c, linewidth=2.1)

    handles2, labels2 = ax[q, i].get_legend_handles_labels()

fig.legend(handles1, labels1, loc='lower center', ncols=3, fontsize=11)
fig.tight_layout(rect=[0, 0, 1.2, 1.2])
fig.subplots_adjust(bottom=0.25)
fig.show()

In [None]:
from scipy.spatial.distance import cdist
from sklearn.covariance import empirical_covariance

data = np.asarray([[0, 0], [1, 1], [0, 1], [1, 0]])
data_miscalibrated = np.asarray([[.3, 0], [.7, 1], [.3, 1], [.7, 0]])
data_covariation_error = np.asarray([[0, 0], [1, 1], [0, 1], [.6, .4]])

plt.figure(figsize=(2.4, 2.4))
plt.scatter(data[:, 0], data[:, 1])
plt.xlabel('Attribute 1', weight='bold')
plt.ylabel('Attribute 2', weight='bold')
plt.title('Concepts', weight='bold')
plt.xlim(-.05, 1.05)
plt.ylim(-.05, 1.05)
plt.show()

plt.figure(figsize=(2.4, 2.4))
plt.scatter(data_miscalibrated[:, 0], data_miscalibrated[:, 1])
plt.xlabel('Attribute 1', weight='bold')
plt.ylabel('Attribute 2', weight='bold')
plt.title('Miscalibrated', weight='bold')
plt.xlim(-.05, 1.05)
plt.ylim(-.05, 1.05)
plt.show()

plt.figure(figsize=(2.4, 2.4))
plt.scatter(data_covariation_error[:, 0], data_covariation_error[:, 1])
plt.xlabel('Attribute 1', weight='bold')
plt.ylabel('Attribute 2', weight='bold')
plt.title('Covariation', weight='bold')
plt.xlim(-.05, 1.05)
plt.ylim(-.05, 1.05)
plt.show()

print(cdist(data, data, 'euclidean'))
print(cdist(data_miscalibrated, data_miscalibrated, 'euclidean'))
print(cdist(data_covariation_error, data_covariation_error, 'euclidean'))
print(empirical_covariance(data_covariation_error))
print(cdist(data, data, 'mahalanobis'))

