In [23]:
import pandas as pd
import numpy as np
import os
import plotly.express as px
import glob
from collections import Counter


def stat_parity_rank_func(df, epoch_columns):
    # calculate the violation of statistical parity
    data = {}
    for g,g_df in df.groupby('gender_expression'):
        data[g] = (g_df[epoch_columns]==0).sum(axis=0)/g_df.shape[0]
    return abs(data['male'] - data['female'])

def stat_parity_func(df, epoch_columns):
    # calculate the violation of statistical parity
    data = {}
    for g,g_df in df.groupby('gender_expression'):
        data[g] = (g_df[epoch_columns]==0).sum(axis=0)/g_df.shape[0]
    return abs(data['male'] - data['female'])

def stat_parity_ratio_func(df, epoch_columns):
    # calculate the violation of statistical parity
    data = {}
    for g,g_df in df.groupby('gender_expression'):
        data[g] = (g_df[epoch_columns]==0).sum(axis=0)/g_df.shape[0]
    return np.abs(1-data['male']/data['female'])

def stat_parity_ratio_rank_func(df, epoch_columns):
    # calculate the violation of statistical parity
    data = {}
    for g,g_df in df.groupby('gender_expression'):
        data[g] = (g_df[epoch_columns]==0).sum(axis=0)/g_df.shape[0]
    return np.abs(1-data['male']/data['female'])

def ratio_errors_func(df, epoch_columns):
    # calculate the violation of statistical parity
    data = {}
    for g,g_df in df.groupby('gender_expression'):
        data[g] = (g_df[epoch_columns]!=0).sum(axis=0)/g_df.shape[0]
    return np.abs(1-data['male']/data['female'])

def stat_parity_from_rank_ratio_func(df, epoch_columns):
    # calculate the violation of statistical parity
    data = {}
    for g,g_df in df.groupby('gender_expression'):
        data[g] = (g_df[epoch_columns] == 0).sum(axis=0)/g_df.shape[0]
    return np.abs(1-data['male']/data['female'])

def rank_func(df, epoch_columns):
    # calculate the violation of statistical parity
    data = {}
    for g,g_df in df.groupby('gender_expression'):
        data[g] = (g_df[epoch_columns]).sum(axis=0)/g_df.shape[0]
    return abs(data['male']-data['female'])

def rank_ratio_func(df, epoch_columns):
    # calculate the violation of statistical parity
    data = {}
    for g,g_df in df.groupby('gender_expression'):
        data[g] = (g_df[epoch_columns]).sum(axis=0)/g_df.shape[0]
    return np.abs(1-data['male']/data['female'])

def acc_func(df, epoch_columns):
    # calculate the accuracy 
    return df[epoch_columns].sum(axis=0)/df.shape[0]

def acc_from_rank_func(df, epoch_columns):
    # calculate the accuracy 
    return (df[epoch_columns] == 0).sum(axis=0)/df.shape[0]

def err_from_rank_func(df, epoch_columns):
    # calculate the accuracy 
    return (df[epoch_columns] != 0).sum(axis=0)/df.shape[0]

def _resolve_head(s):
    if s is None:
        return 'fault'
    if 'CosFace'.lower() in s.lower():
        return 'CosFace'
    elif 'ArcFace'.lower() in s.lower():
        return 'ArcFace'
    elif 'MagFace'.lower() in s.lower():
        return 'MagFace'
    return 'fault'
    
def _resolve_opt(s):
    if 'AdamW'.lower() in s.lower():
        return 'AdamW'
    if 'Adam'.lower() in s.lower():
        return 'Adam'
    if 'SGD'.lower() in s.lower():
        return 'SGD'
    if 'RMSProp'.lower() in s.lower():
        return 'RMSProp'
    return 'fault'


def get_name_details(f):
    f = f[:-1] if f[-1] == '/' else f
    head_id = -8 if 'cosine' in f else -6
    y = os.path.splitext(os.path.basename(f))[0]
    experiment = y.replace('config_','')
    head = _resolve_head(f)
    opt = ""
    #head_i = experiment.lower().index(head.lower())
    model = os.path.splitext(os.path.basename(f))[-2].split("_")[0]
    return experiment, model, head, opt


def analyze_files(files, metadata, ratio=False, error=False, epochs=None):
    acc_df = pd.DataFrame(columns=['epoch_'+str(e) for e in range(100)])
    acc_disp_df = pd.DataFrame(columns=['epoch_'+str(e) for e in range(100)])
    for f in files:
        try:
            df = pd.read_csv(f)
        except:
            continue
        epoch_columns = df.drop('ids',axis=1).columns
        df = metadata.merge(df)
        num_epochs = len(epoch_columns)
        df[epoch_columns] = df[epoch_columns].apply(lambda x: x == df['label'])
        acc = acc_func(df, epoch_columns)
        experiment = get_name_details(f)[0]
        acc_df.loc[experiment] = acc
        if ratio:
            if error:
                acc_disp = ratio_errors_func(df, epoch_columns)
            else:
                acc_disp = stat_parity_ratio_func(df, epoch_columns)
        else:
            acc_disp = stat_parity_func(df, epoch_columns)
        acc_disp_df.loc[experiment] = acc_disp    
    return acc_df, acc_disp_df

def analyze_files_pd(pd_dict_list, metadata, ratio=False, error=False, epochs=None):
    if epochs is None:
        epochs = ['epoch_'+str(e) for e in range(100)]
        
    acc_df = pd.DataFrame(columns=epochs)
    acc_ratio_df = pd.DataFrame(columns=epochs)
    rank_df = pd.DataFrame(columns=epochs)
    
    for d in pd_dict_list:
        experiment = d['experiment']
        df = d['df']
        epoch_columns = list(set(df.columns).intersection(epochs))
        df = metadata.merge(df)
        if error:
            acc = err_from_rank_func(df, epoch_columns)
        else:
            acc = acc_from_rank_func(df, epoch_columns)
        acc_df.loc[experiment] = acc
        
        if ratio:
            if error:
                acc_disp = ratio_errors_func(df, epoch_columns)
            else:
                acc_disp = stat_parity_ratio_rank_func(df, epoch_columns)
        else:
            acc_disp = stat_parity_rank_func(df, epoch_columns)
        acc_ratio_df.loc[experiment] = acc_disp 
        
        if ratio:
            rank_ratio = rank_ratio_func(df, epoch_columns)
        else:
            rank_ratio = rank_func(df, epoch_columns)
        rank_df.loc[experiment] = rank_ratio    
    return acc_df, acc_ratio_df, rank_df


def analyze_rank_files(files, metadata, ratio=False, error=False, epochs=None): 
    pd_dict_list = []
    for f in files:
        try:
            df = pd.read_csv(f)
        except:
            continue
        experiment = f.split('/')[-2]
        pd_dict_list += [{'experiment':experiment,
                          'df': df}]


    acc_df, acc_ratio_df, rank_df = analyze_files_pd(pd_dict_list, metadata, ratio=ratio, error=error, epochs=epochs)
    return prepare(acc_df), prepare(acc_ratio_df), prepare(rank_df)

def analyze_pickle_files(pickle_files, metadata, ratio=False, error=False, epochs=None):
    pd_dict_list = []
    for model_pickle_files in pickle_files:
        df = pd.DataFrame()
        for f in model_pickle_files:
            if f.split('_')[-1].split('.')[0].isdigit():
                epoch = f.split('_')[-1].split('.')[0]
            else:
                epoch = f.split('_')[-2].split('.')[0]
            pickle_df = pd.read_pickle(f)
            pickle_df.columns = [f'Epoch_{epoch}_'+c for c in pickle_df.columns]
            if df.shape[0]:
                df = pd.merge(df,pickle_df,left_index=True, right_index=True)
            else:
                df = pickle_df

        if df.shape[0] == 0:
            continue

        df = df[[x for x in df.columns if 'nearest_by_id' in x]]
        df.columns = ['epoch_'+x.split('_')[1] for x in df.columns]
        df.reset_index(inplace=True)
        df = df.rename(columns = {'index':'ids'})
        experiment = f.split('/')[-2]
        pd_dict_list += [{'experiment':experiment,
                          'df': df}]
    acc_df, acc_ratio_df, rank_df = analyze_files_pd(pd_dict_list, metadata, ratio=ratio, error=error, epochs=epochs)
    return prepare(acc_df), prepare(acc_ratio_df), prepare(rank_df)


def plot_df(acc_df, acc_disp_df, rank_df = None, title = ''):
    def prepare(df):
        # dataframe of a long format
        df = pd.melt(df.reset_index(), id_vars='index')
        df = df.rename(columns={'variable':'epoch'})
        df = df.rename(columns={'value':'Accuracy'})
        df.epoch = df.epoch.apply(lambda x: int(x.split('_')[1]))
        return df
    acc_df = prepare(acc_df)
    acc_disp_df = prepare(acc_disp_df)

    # plotly express
    acc_df['measurement'] = 'Accuracy'
    acc_disp_df['measurement'] = 'Disparity'

    df = acc_df.append(acc_disp_df)

    if rank_df is not None:
        rank_df = prepare(rank_df)
        rank_df['measurement'] = 'Rank'
        df = df.append(rank_df)
        
    df = df.dropna()

    fig = px.line(df, x='epoch', y='Accuracy', color='index', facet_row='measurement', title=title)
    fig.update_yaxes(matches=None)
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    fig.update_layout(yaxis_title="Disparity")

    fig.show()
    
    
def whatIsPareto(df, x_inc=False, y_inc=False):
    isPareto = np.zeros(df.shape[0])
    i = 0
    for _, (c1,c2) in df.iterrows():
        tmp = np.delete(np.array(df), (i), axis=0)
        if x_inc: # is a larger x better?
            if y_inc: # is a larger y better?
                b = np.any(np.apply_along_axis(lambda x: x[0]>c1 and x[1]>c2, 1, tmp))
            else: # is a smaller y better?
                b = np.any(np.apply_along_axis(lambda x: x[0]>c1 and x[1]<c2, 1, tmp))
        else: # is a smaller x better?
            if y_inc: # is a larger y better?
                b = np.any(np.apply_along_axis(lambda x: x[0]<c1 and x[1]>c2, 1, tmp))
            else: # is a smaller y better?
                b = np.any(np.apply_along_axis(lambda x: x[0]<c1 and x[1]<c2, 1, tmp))
        if not b:
            isPareto[i] = 1
        i+=1
    return isPareto

def preparePareto(df, x_inc=False, y_inc=False):
    
    isPareto = whatIsPareto(df, x_inc=x_inc, y_inc=y_inc)
    tmp = df[isPareto == 1]
    
    tmp = tmp.sort_values(df.columns[0])
    return tmp

def prepare(df):
    # dataframe of a long format
    df = pd.melt(df.reset_index(), id_vars='index')
    df = df.rename(columns={'variable':'epoch'})
    df = df.rename(columns={'value':'Metric'})
    df.epoch = df.epoch.apply(lambda x: int(x.split('_')[1]))
    return df

def merge(df1, df2):
    df = df1.merge(df2, on=["index","epoch"])
    df = df.rename(columns={'Metric_x':'Accuracy'})
    df = df.rename(columns={'Metric_y':'Disparity'})
    return df

def drop_models(df_list, models):
    # remove rows with model names in models from each df in the df_list
    out_list = []
    for df in df_list:
        out_list += [df[~df['index'].isin(models)]]
    return out_list


def find_yaml_folder(yaml):
    '''
    given a yaml string file like:
         'config_inception_resnet_v2_CosFace_RMSProp.yaml'
    return the corresponding folder for this experiment:
         './Phase1B/inception_resnet_v2_CosFace_RMSProp'
    if it does not exist, return ''
    '''
    experiment_name = yaml.replace('config_','').replace('.yaml','')
    
    R2_or_Phase1B = 'R2' if 'R2' in yaml else 'Phase1B'
            

    experiment_folders = glob.glob('/cmlscratch/sdooley1/merge_timm/FR-NAS/Checkpoints/{}/*/'.format(R2_or_Phase1B))
    if R2_or_Phase1B == 'Phase1B':
        experiment_folders += glob.glob('/cmlscratch/sdooley1/merge_timm/FR-NAS/Checkpoints/timm_explore_few_epochs/*/')
    where = [get_name_details(experiment_name)[0].lower() == get_name_details(x)[0].lower() for x in experiment_folders]
    yaml_folder = ''
    if any(where):
        yaml_folder = experiment_folders[np.where(where)[0][0]]
    return yaml_folder

def get_finished_models_Phase1B():
    '''
    Return a list of those models which we are including in Phase1B
    '''
    finished = []
    for yaml_orig in glob.glob('/cmlscratch/sdooley1/merge_timm/FR-NAS/configs/**/*.yaml') + glob.glob('/cmlscratch/sdooley1/merge_timm/FR-NAS/configs_multi/**/*.yaml'):
        yaml = os.path.basename(yaml_orig)
        yaml_folder = find_yaml_folder(yaml)
        if yaml_folder:
            finished += [yaml]
    cn = Counter([get_name_details(x)[1] for x in finished])
    final_models = [k for k,v in cn.items() if v>=6]
    final_models.sort()
    if 'vit_large_patch16_224' in final_models:
        final_models.remove('vit_large_patch16_224')
    if 'cait_xs24_384' in final_models:
        final_models.remove('cait_xs24_384')
        
    # make sure vgg_bn goes before vgg
    a, b = final_models.index('vgg19'), final_models.index('vgg19_bn')
    final_models[b], final_models[a] = final_models[a], final_models[b]


    return final_models


def get_pareto_hps_head_opt(stable_df, col='Accuracy'):
    row = []
    for opt in ['adamw', 'sgd']:
        for head in ['ArcFace','CosFace','MagFace']:
            df = stable_df
            df = df[(df['opt'] == opt) & (df['head'] == head)]
            ind = whatIsPareto(df[[col,'Disparity']], True, False).astype(bool)
            out = df[ind].dropna().sort_values(col, ascending=False)
            m = out['model'].to_string(header=False,index=False).split('\n')
            row += ['\n'.join(list(np.unique([x.strip() for x in m])))]
    return row

def get_pareto_hps_opt(stable_df, col='Accuracy'):
    row = []
    for opt in ['adamw', 'sgd']:
            df = stable_df
            df = df[(df['opt'] == opt)]
            ind = whatIsPareto(df[[col,'Disparity']], True, False).astype(bool)
            out = df[ind].dropna().sort_values(col, ascending=False)
            m = out['model'].to_string(header=False,index=False).split('\n')
            row += ['\n'.join(list(np.unique([x.strip() for x in m])))]
    return row

def get_pareto_hps_head(stable_df, col='Accuracy'):
    row = []
    for head in ['ArcFace', 'CosFace', 'MagFace']:
            df = stable_df
            df = df[(df['head'] == head)]
            ind = whatIsPareto(df[[col,'Disparity']], True, False).astype(bool)
            out = df[ind].dropna().sort_values(col, ascending=False)
            m = out['model'].to_string(header=False,index=False).split('\n')
            row += ['\n'.join(list(np.unique([x.strip() for x in m])))]
    return row

def anova_hp_accuracy(df, col = 'Accuracy'):
    df['model'] = df['index'].apply(lambda x: get_name_details(x.replace('_rank_by_id_val',''))[1])
    df['head'] = df['index'].apply(lambda x: get_name_details(x.replace('_rank_by_id_val',''))[2])
    df['opt'] = df['index'].apply(lambda x: get_name_details(x.replace('_rank_by_id_val',''))[3].lower())
    df = df.merge(meta, left_on='model', right_on='model_name')
    df.fillna('0',inplace=True)
    df[col] = df[col].astype(float)

    lm = ols(col+' ~ head + opt', data=df).fit() # fitting the model
    
    print(sm.stats.anova_lm(lm))
    tukey_head = pairwise_tukeyhsd(endog=df[col],
                              groups=df['head'],
                              alpha=0.05)
    print(tukey_head)
    tukey_opt = pairwise_tukeyhsd(endog=df[col],
                              groups=df['opt'],
                              alpha=0.05)
    print(tukey_opt)
    
    return sm.stats.anova_lm(lm), tukey_head, tukey_opt

def anova_hp_disp(df, col = 'Accuracy'):
    df['model'] = df['index'].apply(lambda x: get_name_details(x.replace('_rank_by_id_val',''))[1])
    df['head'] = df['index'].apply(lambda x: get_name_details(x.replace('_rank_by_id_val',''))[2])
    df['opt'] = df['index'].apply(lambda x: get_name_details(x.replace('_rank_by_id_val',''))[3].lower())
    df = df.merge(meta, left_on='model', right_on='model_name')
    df.fillna('0',inplace=True)
    df['Disparity'] = df['Disparity'].astype(float)

    lm = ols('Disparity ~ head + opt', data=df).fit() # fitting the model
    
    print(sm.stats.anova_lm(lm))
    tukey_head = pairwise_tukeyhsd(endog=df['Disparity'],
                              groups=df['head'],
                              alpha=0.05)
    print(tukey_head)
    tukey_opt = pairwise_tukeyhsd(endog=df['Disparity'],
                              groups=df['opt'],
                              alpha=0.05)
    print(tukey_opt)
    
    return sm.stats.anova_lm(lm), tukey_head, tukey_opt

In [24]:
rank_files = glob.glob('/work/dlclarge2/sukthank-ZCP_Competition/NeurIPS2023/FR-NAS/Checkpoints_search/**/*_rank_by_id_val.csv')

In [25]:
rank_files

['/work/dlclarge2/sukthank-ZCP_Competition/NeurIPS2023/FR-NAS/Checkpoints_search/densenet_333/densenet_333_rank_by_id_val.csv',
 '/work/dlclarge2/sukthank-ZCP_Competition/NeurIPS2023/FR-NAS/Checkpoints_search/densenet_444/densenet_444_rank_by_id_val.csv',
 '/work/dlclarge2/sukthank-ZCP_Competition/NeurIPS2023/FR-NAS/Checkpoints_search/densenet_555/densenet_555_rank_by_id_val.csv',
 '/work/dlclarge2/sukthank-ZCP_Competition/NeurIPS2023/FR-NAS/Checkpoints_search/densenet_666/densenet_666_rank_by_id_val.csv',
 '/work/dlclarge2/sukthank-ZCP_Competition/NeurIPS2023/FR-NAS/Checkpoints_search/dpn107_CosFace_SGD_333/dpn107_CosFace_SGD_333_rank_by_id_val.csv',
 '/work/dlclarge2/sukthank-ZCP_Competition/NeurIPS2023/FR-NAS/Checkpoints_search/dpn107_CosFace_SGD_444/dpn107_CosFace_SGD_444_rank_by_id_val.csv',
 '/work/dlclarge2/sukthank-ZCP_Competition/NeurIPS2023/FR-NAS/Checkpoints_search/dpn107_CosFace_SGD_555/dpn107_CosFace_SGD_555_rank_by_id_val.csv',
 '/work/dlclarge2/sukthank-ZCP_Competition/N

In [26]:
metadata = pd.read_csv('/work/dlclarge2/sukthank-ZCP_Competition/NeurIPS2023/FR-NAS/celeba/val_identities_gender-expression_seed_222.csv')

In [27]:
epochs = ['epoch_100']

In [28]:
acc_df, acc_disp_df, rank_df = analyze_rank_files(rank_files, metadata, epochs=epochs)

In [31]:
acc_disp_df

Unnamed: 0,index,epoch,Metric
0,densenet_333,100,0.079885
1,densenet_444,100,0.147197
2,densenet_555,100,0.140257
3,densenet_666,100,0.079885
4,dpn107_CosFace_SGD_333,100,0.040597
5,dpn107_CosFace_SGD_444,100,0.034835
6,dpn107_CosFace_SGD_555,100,0.040204
7,dpn107_CosFace_SGD_666,100,0.03824
8,dpn107_MagFace_SGD_444,100,0.102803
9,dpn107_MagFace_SGD_444_act,100,0.078444


In [33]:
_, acc_disp_ratio_df, rank_ratio_df = analyze_rank_files(rank_files, metadata, ratio=True, epochs=epochs)
err_df, error_ratio_df, _ = analyze_rank_files(rank_files, metadata, ratio=True, error=True, epochs=epochs)
acc_df['Metric'] = 1 - acc_df['Metric']

In [34]:
celeba = merge(acc_df, acc_disp_df)
celeba = merge(celeba, rank_df)
celeba = celeba.rename(columns={"Metric": "Rank Disparity", "Accuracy": "Error"})
celeba = merge(celeba, acc_disp_ratio_df)
celeba = celeba.rename(columns={"Metric": "Ratio"})
celeba = merge(celeba, rank_ratio_df)
celeba = celeba.rename(columns={"Metric": "Rank Ratio"})
celeba = merge(celeba, error_ratio_df)
celeba = celeba.rename(columns={"Metric": "Error Ratio"})
celeba.to_csv("celeba_val_phase2.csv", index=False)

In [35]:
celeba.head()

Unnamed: 0,index,epoch,Error,Disparity,Rank Disparity,Ratio,Rank Ratio,Error Ratio
0,densenet_333,100,0.204819,0.079885,2.018727,0.105774,0.136002,0.326378
1,densenet_444,100,0.237166,0.147197,10.876113,0.213566,0.367779,0.473662
2,densenet_555,100,0.218897,0.140257,13.028156,0.197274,0.576479,0.485274
3,densenet_666,100,0.173782,0.079885,1.087611,0.101599,0.121236,0.373775
4,dpn107_CosFace_SGD_333,100,0.050288,0.040597,2.875196,0.04368,0.538126,0.575139


In [43]:
def plot_figure(metric_name, metric_suffix, split, df):
    colors = ['#e6194B', '#3cb44b', '#ffe119','#f58231','#42d4f4',
              '#f032e6','#fabed4','#469990','#aaffc3','#000075','#e6194B','#9a6324','#dcbeff', '#42d4f4']
    plotted_models = ['DPN','ReXNet', 'Other', 'TNT', 'Inception', 'HRNet', 'EseVoVNet', 'VGG19', 'ResNet-RS', 
                     'DenseNet', 'DPN_MagFace', 'DPN_CosFace', 'SMAC', 'Swin_Transformer']
    color_map = {}
    for c,m in zip(colors,plotted_models):
        color_map[m] = c
    fig = px.scatter(df, 
                     x='Accuracy_mean', 
                     y=f'{metric_name}_mean', 
                     error_x = "Accuracy_std", 
                     error_y = f"{metric_name}_std", 
                     color="Model", 
                     color_discrete_map=color_map,
                     template="simple_white",
                     width=1200, height= 1000
                    )
    if metric_name == 'Rank Disparity':
        fig.update_layout(
            xaxis_range=[0,0.15],
            yaxis_range=[-.01,.6]
        )
    fig.update_layout(
        xaxis_title="Error",
        yaxis_title=metric_name,
        legend_title="Models",
        font=dict(
            family="Times New Roman",
            size=38,
            color="Black"
        )
    )
    fig.update_traces(marker=dict(size=20))
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1,
        font=dict(
            family="Times New Roman",
            size=28,
            color="Black"
        )
    ))
    p = np.array(preparePareto(df[['Accuracy_mean',f'{metric_name}_mean']], False, False).dropna())
    for x, y in zip(p[:-1], p[1:]):
        fig.add_shape(type='line',
                    x0=x[0],y0=x[1],x1=y[0],y1=y[1],
                    line=dict(color='gray',width=4),line_dash='dash',
                    xref='x',yref='y')
    # plotly.io.write_image(fig, f'_RQ2_{split}_vgg_{metric_suffix}.png', format='png')
    fig.show()

In [44]:
plot_figure('Rank Disparity', 'rankdisparity', 'val', celeba)

ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['index', 'epoch', 'Error', 'Disparity', 'Rank Disparity', 'Ratio', 'Rank Ratio', 'Error Ratio'] but received: Accuracy_mean

In [38]:
celeba.head()

Unnamed: 0,index,epoch,Error,Disparity,Rank Disparity,Ratio,Rank Ratio,Error Ratio
0,densenet_333,100,0.204819,0.079885,2.018727,0.105774,0.136002,0.326378
1,densenet_444,100,0.237166,0.147197,10.876113,0.213566,0.367779,0.473662
2,densenet_555,100,0.218897,0.140257,13.028156,0.197274,0.576479,0.485274
3,densenet_666,100,0.173782,0.079885,1.087611,0.101599,0.121236,0.373775
4,dpn107_CosFace_SGD_333,100,0.050288,0.040597,2.875196,0.04368,0.538126,0.575139


In [39]:
def plot_validation(metric_name, metric_suffix, fairness_df, 
                    dataset='VGGFace2', show_all=False, epoch=100):
    df = fairness_df
    df = df[df['epoch'] == epoch]
    df = df.dropna()
    if not show_all:
        df = df[df['Error']<0.3]
    else:
        df = df[df['Error']<1-1/(7636*2)]
        
    df.loc[:,'model'] = df['index'].apply(lambda x: get_name_details(x)[1])
    df = df.sort_values(by=['model'])
    # df = df.sort_values('Error')[:30]


    fig = px.scatter(df, x='Error', 
                     y=metric_name, 
                     template="simple_white", 
                     width=1000, height= 500
                )

    if not show_all:
        fig.update_layout(
            xaxis_range=[0,0.32],
        )
    
    fig.update_layout(
        yaxis_title=metric_name,
        title={
                'text' : f'{dataset} Validation Set',
                'x':0.5,
                'xanchor': 'center'
            },
        font=dict(
            family="Times New Roman",
            size=30,
            color="Black"
        )
    )
    fig.add_shape(type='line',
                    x0=0,y0=0,x1=1,y1=0,
                    line=dict(color='Red',),
                    xref='x',yref='y'
    )
    p = np.array(preparePareto(df[['Error',metric_name]], False, False).dropna())
    for x, y in zip(p[:-1], p[1:]):
        fig.add_shape(type='line',
                    x0=x[0],y0=x[1],x1=y[0],y1=y[1],
                    line=dict(color='gray',width=4),line_dash='dash',
                    xref='x',yref='y')


    pareto_df = pd.merge(df, pd.DataFrame(p), right_on=[0,1], left_on=["Error",metric_name])
    
    if pareto_df.shape[0]<7:
        y_anchor, x_anchor = -.29, .6
    else:
        y_anchor, x_anchor = -.6, .7
        
    fig.update_layout(legend=dict(
        orientation="h",
        # yanchor="bottom",
        # y=-.49,
        # xanchor="right",
        # x=.63,
        yanchor="top",
        y=-.49,
        xanchor="center",
        x=0.5,
        font=dict(
            family="Times New Roman",
            size=24,
            color="Black"
        )
        )
    )

    for model in set(pareto_df['model']):
        # match model name to color
        i = [x.lower()[:3] for x in plotted_models].index(model.lower()[:3])
        name = plotted_models[i]
        color = colors[i]
        fig.add_trace(go.Scatter(
            x=pareto_df[pareto_df['model'] == model]['Error'],
            y=pareto_df[pareto_df['model'] == model][metric_name],
            mode='markers',
            name = name,
            marker = dict(color=color,size=15),
        ))

    fig.show()
    # plotly.io.write_image(fig, f'RQ1_main_{dataset}_{metric_suffix}_17052023.pdf', format='pdf')
    return df

In [45]:
celeba.head()

Unnamed: 0,index,epoch,Error,Disparity,Rank Disparity,Ratio,Rank Ratio,Error Ratio
0,densenet_333,100,0.204819,0.079885,2.018727,0.105774,0.136002,0.326378
1,densenet_444,100,0.237166,0.147197,10.876113,0.213566,0.367779,0.473662
2,densenet_555,100,0.218897,0.140257,13.028156,0.197274,0.576479,0.485274
3,densenet_666,100,0.173782,0.079885,1.087611,0.101599,0.121236,0.373775
4,dpn107_CosFace_SGD_333,100,0.050288,0.040597,2.875196,0.04368,0.538126,0.575139


In [46]:
celeba_val_phase2_mean = pd.DataFrame(columns=["model_name","Error Mean","Error Std","Rank Disparity Mean","Rank Disparity Std","Rank Ratio Mean","Rank Ratio Std","Ratio Mean","Ratio Std"])