Please, before executing this notebook train some models such as: python3 german.py 5 0.7 cuda:0 ROAD TEST_GERMAN_ROAD_10 20 10 10 10 

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn import datasets as ds
import sys, os
sys.path.insert(1, os.path.join(sys.path[0], '..'))
from utils.fairness_utils import pareto_df

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
FOLDER = "./../results_german/"

DATASET_NAME = "GERMAN"

fROAD = "TEST_" + DATASET_NAME + "_ROAD_10_DP" 

# colors
#palette = plt.cm.tab20
palette = sns.color_palette("bright", 10)

METHODS_DICT = {"road": {"fname":fROAD,
                          "name": "ROAD (Ours)",
                          "linestyle":"-",
                          "color": palette[2]}
                
}

for method in METHODS_DICT:
    df_meth = pd.read_csv(FOLDER + METHODS_DICT[method]["fname"] + ".csv")
    df_meth["lambda"] = df_meth["lambda"].astype('str')
    df_meth["lambda"] = df_meth["lambda"].map(lambda x: x[:4])
    df_meth["tau"] = df_meth["tau"].astype('str')
    df_meth["tau"] = df_meth["tau"].map(lambda x: x[:4])
    df_meth["run_id"] = df_meth["run_id"].astype('str')
    
    df_meth_rdm = pd.read_csv(FOLDER + METHODS_DICT[method]["fname"] + "_rdm.csv")
    df_meth_rdm["lambda"] = df_meth_rdm["lambda"].astype('str')
    df_meth_rdm["lambda"] = df_meth_rdm["lambda"].map(lambda x: x[:4])
    df_meth_rdm["tau"] = df_meth_rdm["tau"].astype('str')
    df_meth_rdm["tau"] = df_meth_rdm["tau"].map(lambda x: x[:4])
    df_meth_rdm["run_id"] = df_meth_rdm["run_id"].astype('str')
    
    METHODS_DICT[method]["df"] = df_meth
    METHODS_DICT[method]["df_rdm"] = df_meth_rdm
       
# for printing labels on figures
FEATURE_NAMES = {"Global DI": "Global Fairness (DI)",
                "Global Acc": "Accuracy",
                "top1_DI": "Local Fairness (worst 1 DI)",
                "top3_DI": "Local Fairness (worst 3 DI)", 
                "q_DI_0.8": "Local Fairness (0.8 quantile)"}



# Pareto curves

## 1. Acc - Local Fairness

### 1.a Demographic subgroups

In [None]:
%%time

#### WARNING/ Slow because of PAreto


# Pareto plot Global Acc * top1DI à iso Global DI
sns.set_style('whitegrid')

sns.set_palette("bright")

#### IMPORTANT: this defines the feature to "fix" and the corresponding range. E.g.: keeping only models 
# with GLOBAL DI between 0.0 and 0.05
iso_f = "Global DI"
GDIRANGE = 0.0, 0.05

### FEATURES FOR X AND Y AXES
f1, f2 = "top1_DI", "Global Acc"


legend_names = []
for key in METHODS_DICT:
    print( key)
    method = METHODS_DICT[key]    
    try:
        df = method["df"]
        df2 = df[(df[iso_f] >= GDIRANGE[0])&(df[iso_f] < GDIRANGE[1])]
        color = method["color"]
        linsty = method["linestyle"]  

    except KeyError:
        continue
    ### PARETO FRONT
    dff = pareto_df(df2, f1, f2)
    g = sns.lineplot(data=dff[dff["pareto"]==1], x=f1, y=f2, color=color, linewidth=3, label=method["name"],linestyle =linsty)
    g2 = sns.scatterplot(data=dff[dff["pareto"]==1], x=f1, y=f2, color=color, legend=False, s=30, markers="o",linestyle =linsty)
    legend_names.append(method["name"])

g.invert_xaxis()

plt.legend(fontsize=12)
#plt.legend(legend_names, fontsize=13, loc='lower left')


plt.xlabel('Local Unfairness (worst 1 DI)', fontsize=17)
plt.ylabel(FEATURE_NAMES[f2], fontsize=17)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.ylim((0.67, 0.82))
plt.title(DATASET_NAME + ' (Global DI <' + str(GDIRANGE[1]) + ')', fontsize=17)
plt.tight_layout()

FNAME = "../results_img/"

plt.savefig(FNAME + "german_paretoTop1.pdf")

In [None]:
%%time

#### WARNING/ Slow because of PAreto


# Pareto plot Global Acc * top1DI à iso Global DI
sns.set_style('whitegrid')

sns.set_palette("bright")

#### IMPORTANT: this defines the feature to "fix" and the corresponding range. E.g.: keeping only models 
# with GLOBAL DI between 0.0 and 0.05
iso_f = "Global DI"
GDIRANGE = 0.0, 0.05

### FEATURES FOR X AND Y AXES
f1, f2 = "top3_DI", "Global Acc"


legend_names = []
for key in METHODS_DICT:
    print( key)
    method = METHODS_DICT[key]    
    try:
        df = method["df"]
        df2 = df[(df[iso_f] > GDIRANGE[0])&(df[iso_f] < GDIRANGE[1])]
        color = method["color"]
    except KeyError:
        continue
        
    ### PARETO FRONT
    dff = pareto_df(df2, f1, f2)
    g = sns.lineplot(data=dff[dff["pareto"]==1], x=f1, y=f2, color=color, linewidth=3, label=method["name"])
    g2 = sns.scatterplot(data=dff[dff["pareto"]==1], x=f1, y=f2, color=color, legend=False, s=30, markers="o")
    legend_names.append(method["name"])

g.invert_xaxis()

plt.legend(fontsize=12)
#plt.legend(legend_names, fontsize=13, loc='lower left')


plt.xlabel('Local Unfairness (worst 3 DI)', fontsize=17)
plt.ylabel(FEATURE_NAMES[f2], fontsize=17)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.ylim((0.67, 0.8))
plt.title(DATASET_NAME + ' (Global DI <' + str(GDIRANGE[1]) + ')', fontsize=17)
plt.tight_layout()

FNAME = "./results_img/"

#plt.savefig(FNAME + "german_paretoTop3.pdf")
