In [4]:
import pandas as pd
import os
from collections import defaultdict
import numpy as np

path = os.path.join("statistics", "data")
example = os.listdir(path)[0]
df_dict = {}
for file in os.listdir(path):
    df_dict[file[:-5]]=pd.read_json(os.path.join(path,file), orient="records")
print(df_dict.keys())

dict_keys(['iclr_adult_antag_ds_rl_di', 'iclr_adult_antag_ds_rl_dp', 'iclr_adult_antag_ds_rl_eodds', 'iclr_adult_antag_ds_rl_eopp', 'iclr_adult_antag_ds_rl_pe', 'iclr_adult_fixed_ds_rl_di', 'iclr_adult_fixed_ds_rl_dp', 'iclr_adult_fixed_ds_rl_eodds', 'iclr_adult_fixed_ds_rl_eopp', 'iclr_adult_fixed_ds_rl_pe', 'iclr_brazil_antag_ds_rl_di', 'iclr_brazil_antag_ds_rl_dp', 'iclr_brazil_antag_ds_rl_eodds', 'iclr_brazil_antag_ds_rl_eopp', 'iclr_brazil_antag_ds_rl_pe', 'iclr_brazil_fixed_ds_rl_di', 'iclr_brazil_fixed_ds_rl_dp', 'iclr_brazil_fixed_ds_rl_eodds', 'iclr_brazil_fixed_ds_rl_eopp', 'iclr_brazil_fixed_ds_rl_pe'])


# Check whether on average no loss in accuracy and amount of solutions found is visible for shifty when compared to other models

In [5]:
df_ex = df_dict["iclr_adult_fixed_ds_rl_dp"].copy()
pprint_map = {
    'SC'              : 'Seldonian',
    'QSC'             : 'Quasi-Seldonian',
    'SRC'             : 'Seldonian$_{GDS}$',
    'QSRC'            : 'Shifty',
    'FairlearnSVC'    : 'Fairlearn',
    'LinSVC'          : 'Baseline', #'SVC$_{linear}$',
    'SGD' 	          : 'Baseline', #'SGD',
    'SGD(hinge)'      : 'SGD$_{hinge}$',
    'SGD(log)' 	      : 'SGD$_{log}$',
    'SGD(perceptron)' : 'SGD$_{perc}$',
    'SVC' 	          : 'SVC$_{rbf}$',
    'FairConst'       : 'Fairness Constraints',
    'FairRobust'      : 'RFLearn'
}

df_ex["name"]=df_ex["name"].map(pprint_map)
deployed = defaultdict(list)
original = defaultdict(list)

def get_algo_dict():
    return defaultdict(dict)
var_dict = defaultdict(get_algo_dict)

columns = ["original_nsf", "original_acc_mean", "antagonist_acc_mean", "original_g_mean", "antagonist_g_mean"]

for dataset_constraint in df_dict:
    for algo in df_ex['name'].unique():
        df = df_dict[dataset_constraint].copy()
        df["name"]=df["name"].map(pprint_map)

        # pick algo
        df_model = df[df['name']==algo].copy()

        for var in columns:
            var_dict[dataset_constraint][algo][var] = [df_model[var].sum(),len(df_model[var])-df_model[var].isna().sum()] # Add values for calculating mean for var in constraint for certain model

        # diff
        for var in ["original_acc_mean", "antagonist_acc_mean"]:
            x = float(df_model.iloc[-1][var]-df_model.iloc[0][var])
            var_diff = f"{var}_diff"
            var_dict[dataset_constraint][algo][var_diff]= x # Calculate difference between first and last epoch for var in constraint for certain model

adult_antag =  defaultdict(dict)
adult_fixed =  defaultdict(dict)
brazil_antag = defaultdict(dict)
brazil_fixed = defaultdict(dict)

def create_mean(d):
    dd = defaultdict(get_algo_dict)
    for algo in df_ex['name'].unique():  
        for var in  columns:
            dd[algo][f"{var}_mean"] = d[algo][var]/d[algo][f"{var}_count"]
    return dd

def create_dict(d, data, type):
    d = defaultdict(get_algo_dict)
    for dataset in [i for i in var_dict if data in i]:
        if type in dataset:
            for algo in df_ex['name'].unique():   
                for var in columns:
                    d[algo][var] = var_dict[dataset][algo][var][0]
                    d[algo][f"{var}_count"] = var_dict[dataset][algo][var][1]
    dd = create_mean(d)
    return dd

for dataset in var_dict["iclr_adult_fixed_ds_rl_dp"]:
    adult_antag = create_dict(adult_antag, "adult", "antag")
    adult_fixed = create_dict(adult_antag, "adult", "fixed")
    brazil_antag = create_dict(adult_antag, "brazil", "antag")
    brazil_fixed = create_dict(adult_antag, "brazil", "fixed")



  dd[algo][f"{var}_mean"] = d[algo][var]/d[algo][f"{var}_count"]


In [6]:
def display_df(name ,dict_constraint):
    df = pd.DataFrame(dict_constraint).T
    df = df[["original_nsf_mean", "original_acc_mean_mean", "antagonist_acc_mean_mean"]].iloc[0:-1]
    print(name)
    # display(df)
    print(df.round(3).to_latex())

display_df("adult_antag", adult_antag)
display_df("adult_fixed", adult_fixed)
display_df("brazil_antag", brazil_antag)
display_df("brazil_fixed", brazil_fixed)


adult_antag
\begin{tabular}{lrrr}
\toprule
{} &  original\_nsf\_mean &  original\_acc\_mean\_mean &  antagonist\_acc\_mean\_mean \\
\midrule
Fairness Constraints &              0.000 &                   0.807 &                     0.788 \\
RFLearn              &              0.000 &                   0.810 &                     0.788 \\
Fairlearn            &              0.000 &                   0.806 &                     0.783 \\
Quasi-Seldonian      &              0.062 &                   0.807 &                     0.786 \\
Shifty               &              0.312 &                   0.802 &                     0.788 \\
Seldonian            &              0.583 &                   0.791 &                     0.785 \\
\bottomrule
\end{tabular}

adult_fixed
\begin{tabular}{lrrr}
\toprule
{} &  original\_nsf\_mean &  original\_acc\_mean\_mean &  antagonist\_acc\_mean\_mean \\
\midrule
Fairness Constraints &              0.000 &                   0.807 &                     0.783 \