In [1]:
import pandas as pd
import numpy as np
import os
from analysis import *
import glob
import string
import plotly.graph_objects as go
import plotly
from functools import reduce

In [2]:
def reduce_results(results, split, write=False):
    mean_acc = reduce(lambda x, y: pd.merge(x, y, on='index'), [x['acc_df'] for x in results if x['split'] == split]).mean(axis=1)
    mean_rank = reduce(lambda x, y: pd.merge(x, y, on='index'), [x['disp_df'] for x in results if x['split'] == split]).mean(axis=1)
    std_acc = reduce(lambda x, y: pd.merge(x, y, on='index'), [x['acc_df'] for x in results if x['split'] == split]).sem(axis=1)
    std_rank = reduce(lambda x, y: pd.merge(x, y, on='index'), [x['disp_df'] for x in results if x['split'] == split]).sem(axis=1)

    df = pd.DataFrame(columns = ['Accuracy_mean', 'Accuracy_std', 'Disparity_mean', 'Disparity_std'])
    df['Accuracy_mean'] = mean_acc
    df['Disparity_mean'] = mean_rank
    df['Accuracy_std'] = std_acc
    df['Disparity_std'] = std_rank
    df['Model'] = [x[:x.find('Face')-4] if 'dpn' not in x else x[:x.find('Face')+4].replace('dpn107','DPN') for x in df.index]
    if write:
        df.to_csv(f'vggface_phase1_{split}.csv')
    return df

In [3]:
def make_tables(metric_function_output, **args):
    results = []
    for split in ['val','test']:
        metadata = pd.read_csv(f'vggface2_{split}_identities_gender.csv')
        for seed in [111,222,333]:
            pickle_files = []
            for model in glob.glob(f'vggface2_train_{seed}/[!C]*'):
                pickle_files += [glob.glob(model+f'/*[!ema]_{split}.pkl')]

            epochs = [f'epoch_{x}' for x in range(11,12)]

            acc_df_vgg, _, _ = analyze_pickle_files(pickle_files, metadata, epochs=epochs)
            disp_df = analyze_pickle_files(pickle_files, metadata, epochs=epochs, **args)[metric_function_output]
            acc_df_vgg['Metric'] = 1 - acc_df_vgg['Metric']

            #SMAC models
            pickle_files = []
            for model in glob.glob(f'vggface2_train_{seed}/Checkpoint*'):
                if len([glob.glob(model+f'/*[!ema]_{split}.pkl')][0]):
                    pickle_files += [glob.glob(model+f'/*[!ema]_{split}.pkl')]
                else:
                    pickle_files += [glob.glob(model+f'/*.pkl')]

            epochs = [f'epoch_{x}' for x in range(10,11)]

            acc_df_vgg_smac, _, _ = analyze_pickle_files(pickle_files, metadata, epochs=epochs)
            disp_df_smac = analyze_pickle_files(pickle_files, metadata, epochs=epochs, **args)[metric_function_output]
            acc_df_vgg_smac['Metric'] = 1 - acc_df_vgg_smac['Metric']

            acc_df_vgg = pd.concat([acc_df_vgg,acc_df_vgg_smac])
            disp_df = pd.concat([disp_df,disp_df_smac])
            res = {
                'split': split,
                'seed': seed,
                'acc_df': acc_df_vgg[['index','Metric']].set_index('index'),
                'disp_df': disp_df[['index','Metric']].set_index('index')
                  }
            results += [res]
    df_val = reduce_results(results, 'val')
    df_test = reduce_results(results, 'test')
    return df_val, df_test

In [4]:
def plot_figure(metric_name, metric_suffix, split, df):
    fig = px.scatter(df, 
                     x='Accuracy_mean', 
                     y='Disparity_mean', 
                     error_x = "Accuracy_std", 
                     error_y = "Disparity_std", 
                     color="Model", 
                     color_discrete_map=color_map,
                     template="simple_white",
                     width=1200, height= 1000
                    )
    if metric_name == 'Rank Disparity':
        fig.update_layout(
            xaxis_range=[0,0.15],
            yaxis_range=[-.01,.6]
        )
    fig.update_layout(
        xaxis_title="Error",
        yaxis_title=metric_name,
        legend_title="Models",
        font=dict(
            family="Times New Roman",
            size=38,
            color="Black"
        )
    )
    fig.update_traces(marker=dict(size=20))
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1,
        font=dict(
            family="Times New Roman",
            size=28,
            color="Black"
        )
    ))
    p = np.array(preparePareto(df[['Accuracy_mean','Disparity_mean']], False, False).dropna())
    for x, y in zip(p[:-1], p[1:]):
        fig.add_shape(type='line',
                    x0=x[0],y0=x[1],x1=y[0],y1=y[1],
                    line=dict(color='gray',width=4),line_dash='dash',
                    xref='x',yref='y')
    plotly.io.write_image(fig, f'_RQ2_{split}_vgg_{metric_suffix}.png', format='png')
    fig.show()

In [5]:
colors = ['#e6194B', '#3cb44b', '#ffe119','#f58231','#42d4f4',
          '#f032e6','#fabed4','#469990','#aaffc3','#000075','#e6194B','#9a6324','#dcbeff', '#42d4f4']
plotted_models = ['DPN','ReXNet', 'Other', 'TNT', 'Inception', 'HRNet', 'EseVoVNet', 'VGG19', 'ResNet-RS', 
                 'DenseNet', 'DPN_MagFace', 'DPN_CosFace', 'SMAC', 'Swin_Transformer']
color_map = {}
for c,m in zip(colors,plotted_models):
    color_map[m] = c

In [6]:
def phase2(metric_name, metric_suffix, metric_function_output, plot=False, **args):
    lookup = {
        'Checkpoints_Edges_301_LR_0.13828312564892567_Head': 'SMAC',
        'Checkpoints_Edges_258_LR_0.1404172769842098_Head': 'SMAC',
        'Checkpoints_Edges_301_LR_0.13828312564892567_Head': 'SMAC',
        'Checkpoints_Edges_248_LR_0.09532880096168164_Head': 'SMAC',
        'coat_lite_small': 'Other',
        'convit_base': 'Other',
        'cspdarknet53': 'Other',
        'dla102x2': 'Other',
        'DPN_ArcFace': 'Other',
        'DPN_MagFace': 'DPN_MagFace',
        'DPN_CosFace': 'DPN_CosFace',
        'ese_vovnet39b': 'EseVoVNet',
        'hrnet_w64': 'HRNet',
        'jx_nest_base': 'Other',
        'rexnet_200': 'ReXNet',
        'swin_base_patch4_window7_224': 'Swin_Transformer',
        'tf_efficientnet_b7_ns': 'Other',
        'tnt_s_patch16_224': 'TNT',
        'twins_svt_large': 'Other'
    }
    
    df_val, df_test = make_tables(metric_function_output, **args)
    df_val['Model'] = df_val['Model'].apply(lambda l: lookup[l])
    df_test['Model'] = df_test['Model'].apply(lambda l: lookup[l])
    if plot:
        plot_figure(metric_name, metric_suffix, 'val', df_val)
        plot_figure(metric_name, metric_suffix, 'test', df_test)   
    return df_val, df_test

In [13]:
# df_val, df_test = phase2('Rank Disparity', '', 2)
# df_val, df_test =  phase2('Disparity','disparity', 1)
# df_val, df_test = phase2('Rank Ratio', 'rank_ratio', 2, ratio=True)
# df_val, df_test = phase2('Ratio', 'ratio', 1, ratio=True)
df_val, df_test = phase2('Error Ratio', 'error_ratio', 1, ratio=True, error=True)

In [14]:
df_val.to_csv('vggface_phase2pareto_errorratio_val.csv')
df_test.to_csv('vggface_phase2pareto_errorratio_test.csv')