In [1]:
%load_ext autoreload
%autoreload 2

In [9]:
from glob import glob
import os
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import numpy as np
import functools
from copy import deepcopy
import math

from util.metrics_utils import *
import warnings
warnings.filterwarnings('ignore')

In [10]:
img_path = '/media/nas/datasets/MIRAGE_2020/FSCIL_approaches/hf-project/final_results/results/images'
metrics = {'f1_score': 'F1 Score'}

metric_dict = dict()
lower_is_better_dict = dict()

for metric in metrics:
    metric_dict.update({
        '%s' % metrics[metric]: 'per_class_metrics_metrics_%s_macro-material.parquet' % metric,
        '%s Forgetting' % metrics[metric]: 'per_episode_metrics_metrics_forgetting_%s_per episode-material.parquet' % metric,
        '%s Intransigence' % metrics[metric]: 'per_class_metrics_metrics_%s_macro intransigence-material.parquet' % metric,
        '%s Drop' % metrics[metric]: 'per_class_metrics_metrics_%s_macro intransigence-material.parquet' % metric,
        '%s Drop Old' % metrics[metric]: 'per_class_metrics_metrics_%s_macro intransigence-material.parquet' % metric,
    })

    lower_is_better_dict.update({
        '%s' % metrics[metric]: False,
        '%s Forgetting' % metrics[metric]: True,
        '%s Intransigence' % metrics[metric]: True,
        '%s Drop' % metrics[metric]: True,
        '%s Drop Old' % metrics[metric]: True
    })

scenario_discrs_dict = {
   '$CV_{5}$': ['coervionba20inr5stp5'],
   '$CV_{10}$': ['coervionba20in10stp3'],
   '$CV_{20}$': ['coervionba20in20stp2'],
   '$TC_{1}$': ['tricclonba34inr1stp7'],
   '$39_{1}$': ['393911ba39inr1stp2'],
   '$38_{2}$': ['tricclonba38inr2stp2'],
   '$TC_{2}$': ['tricclonba34inr2stp4'],
   '$TC_{3}$': ['tricclonba34inr3stp3'],
#    '$CV$': ['coervionba20inr5stp5', 'coervionba20in10stp3', 'coervionba20in20stp2'],
#    '$TC$': ['tricclonba34inr1stp7', 'tricclonba34inr2stp4', 'tricclonba34inr3stp3'],
}

scenario_descr_dict = {
    '$CV_{5}$': 'Computer Vision (CV) - Base: 20; Incr: 5 [Rank @ 40 Apps]',
    '$CV_{10}$': 'Computer Vision (CV) - Base: 20; Incr: 10 [Rank @ 40 Apps]',
    '$CV_{20}$': 'Computer Vision (CV) - Base: 20; Incr: 20 [Rank @ 40 Apps]',
    '$TC_{1}$': 'Traffic Classification (TC) - Base: 34; Incr: 1 [Rank @ 40 Apps]',
    '$39_{1}$': 'Traffic Classification (TC) - Base: 39; Incr: 1 [Rank @ 40 Apps]',
    '$38_{2}$': 'Traffic Classification (TC) - Base: 38; Incr: 2 [Rank @ 40 Apps]',
    '$TC_{2}$': 'Traffic Classification (TC) - Base: 34; Incr: 2 [Rank @ 40 Apps]',
    '$TC_{3}$': 'Traffic Classification (TC) - Base: 34; Incr: 3 [Rank @ 40 Apps]',
#     '$CV$': 'Computer Vision (CV) - Base: 20; Incr: 5, 10, and 20 [Rank @ 40 Apps]',
#     '$TC$': 'Traffic Classification (TC) - Base: 34; Incr: 1, 2, and 3 [Rank @ 40 Apps]',
}

selected_approaches_dict = {
#     'incr': ['icarl', 'icarlp', 'bic', 'il2m', 'lwf', 'lucir', 'ewc', 'eeil', 'ssil', 'wu2022', 'chen2021', 'joint'],
    'incr': ['icarl', 'icarlp', 'bic', 'il2m', 'lwfgkd', 'lucir', 'ewc', 'eeil', 'ssil', 'chen2021', 'scratch'],
#     'naive': ['freezing', 'finetuning', 'backbonefreezing', 'jointft', 'jointmem', 'joint']
    'naive': ['freezing', 'finetuning', 'backbonefreezing', 'jointft', 'jointmem', 'backbonefreezingmem', 'scratch'],
    'kd': ['lwf', 'lwfgkd', 'scratch']
}

In [11]:
df_rank = {}
for analysis in selected_approaches_dict:
    rank_rows = []
    for scenario_discrs in tqdm(scenario_discrs_dict):
        tmp_rank_rows = []
        for metric in metric_dict:
            files = []
            for scenario_discr in scenario_discrs_dict[scenario_discrs]:
                files.extend(glob('%s/%s_*/material/%s' % (img_path, scenario_discr, metric_dict[metric])))

            if len(files)==0:
                print('No files found for %s %s' % (scenario_discr, metric_dict[metric]))
                continue

            dfs = []
            for file in files:
                dfs.append(pd.read_parquet(file))
            df = pd.concat(dfs, axis=0)
            df.loc[:, 'Approach'] = df.loc[:, 'Approach'].apply(lambda x: appr_dict_r[x])

#             df.loc[(df['Approach'] == 'joint') & ~(df['Memory Size'] == 0), 'Approach'] = 'jointmem'
#             df.loc[(df['Approach'] == 'backbonefreezing') & ~(df['Memory Size'] == 0), 'Approach'] = 'backbonefreezingmem'
            df = df[df['Approach'].isin(selected_approaches_dict[analysis])]

            if 'Forgetting' in metric:
                df_40 = df[(df['#Apps'] == 40) & (df['Type'] == 'Old')]
            elif 'Intransigence' in metric:
                df_40 = df[(df['#Apps'] == 40) & (df['Type'] == 'New')]
                df_40.rename(columns={'F1 Score Drop': 'F1 Score Intransigence'}, inplace=True)
            elif 'Drop Old' in metric:
                df_40 = df[(df['#Apps'] == 40) & (df['Type'] == 'Old')]
                df_40.rename(columns={'F1 Score Drop': 'F1 Score Drop Old'}, inplace=True)
            else:
                if 'Drop' not in metric:
                    complement = False
                df_40 = df[(df['#Apps'] == 40) & (df['Type'] == 'All')]

            if 'Last App' not in df_40:
                df_40.loc[:, 'Last App'] = -1
            
            complement = False
            for g, d in df_40.groupby(['Seed', 'Increment', 'Last App']):
                if g[0] == 0 and g[2] == -1:
                    continue
                
                tmp_d = d.drop_duplicates('Approach')
                row = {'Scenario': scenario_discrs, 'Seed': g[0], 'Increment': g[1], 'Last App': g[2],
                       'Metric': ('c' if complement else '') + metric}
                r = 0
                for appr in tmp_d.sort_values(metric, ascending=lower_is_better_dict[metric])['Approach'].values:
                    value = float(tmp_d[tmp_d['Approach'] == appr][metric].values[0])
                    if appr != 'scratch':
                        r += 1
                        rank = r
                    else:
                        rank = np.nan
                    row.update({'Approach': appr, 'Rank': rank, 'Value [%]': 100 - value if complement else value})
                    tmp_rank_rows.append(deepcopy(row))

        tmp_df = pd.DataFrame(tmp_rank_rows)
        for g, d in tmp_df.groupby(['Seed', 'Increment', 'Last App']):
            tmp_d = d.groupby(['Approach'])['Value [%]'].mean().reset_index()
            row = {'Scenario': scenario_discrs, 'Seed': g[0], 'Increment': g[1], 'Last App': g[2], 'Metric': 'Average'}
            r = 0
            for appr in tmp_d.sort_values('Value [%]', ascending=False)['Approach'].values:
                value = float(tmp_d[tmp_d['Approach'] == appr]['Value [%]'].values[0])
                if appr != 'scratch':
                    r += 1
                    rank = r
                else:
                    rank = np.nan
                row.update({'Approach': appr, 'Rank': rank, 'Value [%]': value})
                tmp_rank_rows.append(deepcopy(row))

        rank_rows.extend(tmp_rank_rows)

    df_rank[analysis] = pd.DataFrame(rank_rows)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:06<00:00,  1.29it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:04<00:00,  1.68it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:03<00:00,  2.45it/s]


In [12]:
df_rank['incr']['Metric'].unique()

array(['F1 Score', 'F1 Score Forgetting', 'F1 Score Intransigence',
       'F1 Score Drop', 'F1 Score Drop Old', 'Average'], dtype=object)

In [13]:
df_rank['incr'].columns

Index(['Scenario', 'Seed', 'Increment', 'Last App', 'Metric', 'Approach',
       'Rank', 'Value [%]'],
      dtype='object')

In [24]:
selected_scenario = ['$39_{1}$', '$CV_{20}$']
csv_path = os.path.join(img_path, 'radarplot_final')
for approach in selected_approaches_dict.keys():
    if approach != 'kd':
        for scenario in selected_scenario:
            stats_df = df_rank[approach][(df_rank[approach]['Scenario'] == scenario)].groupby(['Approach', 'Metric']).describe().reset_index()[['Approach', 'Metric', 'Value [%]']]
            df = pd.concat([stats_df['Approach'], stats_df['Metric'], stats_df['Value [%]']['mean']], axis=1)
            #print(df)
            #input('check')
            df.to_csv(os.path.join(csv_path, scenario + '_' + approach + '.csv'))

In [None]:
selected_scenario = ['$39_{1}$', '$CV_{20}$']
df_rank['naive'][(df_rank['naive']['Scenario'] == selected_scenario[0]) & (df_rank['naive']['Metric'] == 'F1 Score')].groupby('Approach').describe()

In [None]:
_df_rank = deepcopy(df_rank)
pd.concat(_df_rank.values()).to_parquet('data/ranking_data.parquet')

for analysis in _df_rank:
    df_groups = _df_rank[analysis].groupby(['Scenario', 'Metric'])
    df_list = []
    for gr in tqdm(df_groups.groups):
        gr_df = df_groups.get_group(gr)
        if 'scratch' not in gr_df['Approach'].unique():
            g_df = gr_df.groupby(['Seed', 'Increment', 'Last App'])
            for g in g_df.groups:
                sel_df = g_df.get_group(g)
                sel_df.loc[len(sel_df.index)] = [gr[0], g[0], g[1], g[2], gr[1], 'scratch', np.nan, np.nan]
                df_list.append(sel_df)
        else:
            df_list.append(gr_df)
    _df_rank[analysis] = pd.concat(df_list)


In [None]:
_df_rank['kd']['Approach'].unique()

In [None]:
_df_rank['kd'][_df_rank['kd']['Scenario'] == '$39_{1}$'].groupby(['Metric', 'Approach']).mean()

In [None]:
# _df_rank = deepcopy(df_rank)
# for key in _df_rank:
#     _df_rank[key].loc[:, ['Metric', 'Value [%]']] = _df_rank[key].loc[:, ['Metric', 'Value [%]']].apply(
#         lambda x: x if x['Metric'] == 'F1 Score' else {'Metric': 'c' + x['Metric'], 'Value [%]': 100 - x['Value [%]']},
#         axis=1, result_type='expand')

In [None]:
# def compute_rank_per_dataset(ser, ascending=True):
#     ser_sorted = ser.sort_values(ascending=ascending)
#     ser_ranked = pd.Series(np.arange(1, ser.shape[0]+1), index=ser_sorted.index.tolist())
#     ser_ranked = ser_ranked.loc[ser.index]
#     return ser_ranked

In [None]:
# __df = _df_rank['incr'][(_df_rank['incr']['Scenario'].isin(['$39_{1}$']))].groupby(['Approach', 'Scenario', 'Last App'])['Value [%]'].mean().reset_index()
# __df_m = __df.pivot(index='Last App', columns='Approach', values='Value [%]')
# __df_r = __df_m.apply(compute_rank_per_dataset, axis=1, ascending=False)
# ser_avg_rank = __df_r.mean(axis=0)

# print(ser_avg_rank.sort_values())

# __r = []
# __df_g = __df.groupby('Last App')
# for g, d in __df_g:
#     for r, appr in enumerate(d.sort_values('Value [%]', ascending=False)['Approach'].values):
#         __r.append({'Dataset': g, 'Approach': appr, 'Rank': r + 1})

# print(pd.DataFrame(__r).groupby('Approach')['Rank'].mean().sort_values())

# print(pd.DataFrame(_df_rank['incr'][
#     (_df_rank['incr']['Scenario'].isin(['$39_{1}$'])) &
#     (_df_rank['incr']['Metric'] == 'Average')
# ]).groupby('Approach')['Rank'].mean().sort_values())

# print(__df.groupby('Approach')['Value [%]'].mean().sort_values(ascending=False))


In [None]:
_df_rank['incr']['Approach'].unique()

In [None]:
# _df_rank['incr'][(_df_rank['incr']['Approach'] == 'joint') & (_df_rank['incr']['Scenario'] == '$39_{1}$')]

In [None]:
# df_mean_rank_per_appr = _df_rank.groupby(['Scenario', 'Metric', 'Approach', 'Increment'])['Rank'].mean().reset_index()
# df_mean_rank_per_appr = df_mean_rank_per_appr.groupby(['Scenario', 'Approach', 'Increment'])['Rank'].mean().reset_index()
# df_std_rank_per_appr = _df_rank.groupby(['Scenario', 'Approach', 'Increment'])['Rank'].std().reset_index()
# df_mean_rank_per_appr.loc[:, 'StdDev'] = df_std_rank_per_appr['Rank']

# df_mean_rank_per_appr.loc[df_mean_rank_per_appr['Approach'].isin(['wu2022', 'chen2021', 'icarlp']), 'Type'] = 'Net'
# df_mean_rank_per_appr.loc[~df_mean_rank_per_appr['Approach'].isin(['wu2022', 'chen2021', 'icarlp']), 'Type'] = 'Vis'

# cv20_filt = df_mean_rank_per_appr['Scenario'] == '$CV_{20}$'
# tc1_filt = df_mean_rank_per_appr['Scenario'] == '$39_{1}$'

# print(df_mean_rank_per_appr.loc[cv20_filt, 'Scenario'].unique())
# print(df_mean_rank_per_appr.loc[tc1_filt, 'Scenario'].unique())
# print(df_mean_rank_per_appr[tc1_filt])
# print(df_mean_rank_per_appr[cv20_filt])

In [None]:
# metric = 'F1 Score'
# discr = 'c'
# r = 'Value [%]'
# metrics_list = ['%s' % metric,
#         discr + '%s Intransigence' % metric,
#         discr + '%s Forgetting' % metric,
#         discr + '%s Drop' % metric,
#         discr + '%s Drop Old' % metric]
# order_metrics_list = [2, 3, 0, 1, 4]
# sorting_per_scenario = {}
# for analysis in selected_approaches_dict:
#     sorting_per_scenario[analysis]={}
#     df = _df_rank[analysis]
#     for scenario in df['Scenario'].unique():
#         approaches = df['Approach'].unique()
#         approaches_values_arr = np.zeros(approaches.shape[0])
#         for i in range(approaches.shape[0]):
#             df_g = df[(df['Approach'] == approach) & (df['Scenario'] == scenario)].groupby(['Metric'])
#             appr_df_tmp_list = []
#             for g, d in df_g:
#                 fact = 0
#                 appr_df_tmp_list.append([g, abs(fact - d[r].mean()), d[r].std()])
#             appr_df_tmp = pd.DataFrame(columns=['Metric', 'Value', 'Standard'], data=appr_df_tmp_list)
#             appr_metrics_values = list(appr_df_tmp[appr_df_tmp['Metric'].isin(metrics_list)]['Value'].values)
#             #print(appr_metrics_values)
#             appr_metrics_values = [appr_metrics_values[i] for i in order_metrics_list]
#             #print(appr_metrics_values)
#             approaches_values_arr = sum([appr_metrics_values[i] * appr_metrics_values[(i+1) % len(order_metrics_list)] for i in range(len(appr_metrics_values))])
        
# #         print(approaches)
# #         print([approaches_values_arr[i] for i in np.argsort(approaches_values_arr)])
# #         input()
#         sorting_per_scenario[analysis][scenario] = []

In [None]:
#plot_spasa_di_ranking_spider(r='Rank', metric='acc', r_range=(12, 1), autoticks=True,savefig=os.path.join(ranking_path, 'rank'), plot_std=False)

In [None]:
#plot_spasa_di_ranking_spider(r='Rank', metric='f1', r_range=(12, 1), autoticks=True, plot_std=False)
#plot_spasa_di_ranking_spider(r='Rank', metric='f1', r_range=(12, 1), autoticks=True,savefig=os.path.join(ranking_path, 'rank'), plot_std=True, top=3)

In [None]:
_df_rank['incr']['Metric'].unique()

In [None]:
import plotly.graph_objects as go
from plotly.io import write_image
from copy import copy
from util.colour_mapper import *
import plotly.io as pio
import math
palette = colour_mapper()
ranking_by_area = False

linestyles = {
    'incr': {
        '$38_{2}$': {
            'chen2021': 'solid', 'bic': 'dash', 'lwfgkd': 'dot',
            'ewc': 'solid', 'lucir': 'dash', 'icarlp': 'dot',
            'eeil': 'dash', 'il2m': 'dot'
        },
        '$39_{1}$': {
            'bic': 'solid', 'chen2021': 'dash', 'lwfgkd': 'dot',
            'icarlp': 'solid', 'ewc': 'dash', 'eeil': 'dot',
            'lucir': 'solid', 'il2m': 'dash'
        },
        '$CV_{20}$': {
            'chen2021': 'solid', 'bic': 'dash', 'lucir': 'dot',
            'lwf': 'solid', 'ssil': 'dash',
            'eeil': 'solid', 'icarlp': 'dash', 'il2m': 'dot'
        }
    },
    'naive': {
        '$38_{2}$': {
            'jointmem': 'solid', 'backbonefreezingmem': 'dash',
            'finetuning': 'solid', 'jointft': 'dash',
            'backbonefreezing': 'solid', 'freezing': 'dash',
        },
        '$39_{1}$': {
            'backbonefreezingmem': 'solid', 'jointmem': 'dash',
            'finetuning': 'solid', 'jointft': 'dash',
            'backbonefreezing': 'solid', 'freezing': 'dash',
        },
        '$CV_{20}$': {
            'jointmem': 'solid', 'backbonefreezingmem': 'dash',
            'finetuning': 'dash', 'jointft': 'dot',
            'freezing': 'dash', 'backbonefreezing': 'dot'
        }
    },
    'kd': {
        '$39_{1}$': {
            'lwf': 'solid', 'lwfgkd': 'dash'
        },
        '$CV_{20}$': {
            'lwf': 'solid', 'lwfgkd': 'dash'
        }
    }
}


def plot_spasa_di_ranking_spider(data, r='Rank', metric='f1', r_range=(12, 1), autoticks=False, plot_std=False, savefig=None,
                                width=850, height=580, dpi=300, top=None, fill=None, show_annotations=False, best_mode='max',
                                show_title=True, axis_angle=0, selected_approaches=None, legend=True, sorted_approaches=None,
                                order_metrics = [2, 3, 0, 1, 4], dashes={}, selected_scenario=None, min_max_norm=True, fig_format='pdf'):
    
    def to_plotly_color(mpl_color, alpha=1):
        plotly_color = 'rgba(%d,%d,%d,%.1f)' % (tuple(v * 255 for v in mpl_color[:3]) + (alpha,))
        return plotly_color
    
    colors_dict = dict([(k, v) for k, v in zip(
        data['Approach'].unique(), [cm.get_cmap('tab10')(i/10) for i in range(10)] + [(.1, .1, .1, 1.0)])])
    colors_dict = palette
    markers_dict = dict([(k, v) for v, k in enumerate(data['Approach'].unique())])
    
    discr = '' if r == 'Rank' or r == 'Value [%]' else 'c'
    
    metrics_dict = {
        'F1 Score': 'Overall',
        discr + 'F1 Score Intransigence': discr + 'DropNew',
#        discr + 'F1 Score Forgetting': discr + 'Forgetting',
        discr + 'F1 Score Drop': discr + 'DropAverage',
        discr + 'F1 Score Drop Old': discr + 'DropOld',
#         discr + 'F1 Score': discr + 'Average',
#         'F1 Score Intransigence': 'Intransigence',
#         'F1 Score Forgetting': 'Forgetting',
#         'F1 Score Drop': 'Drop-Average',
#         'F1 Score Drop Old': 'Drop-Old',
        #'Accuracy All': 'Accuracy Average',
        #'Accuracy Intransigence': discr + 'Accuracy Intransigence',
        #'Accuracy Forgetting Per Episode': discr + 'Accuracy Forgetting',
        #'Accuracy Drop All': discr + 'Accuracy Average Drop',
        #'Accuracy Drop Old': discr + 'Accuracy Old Drop'
    }
    # Fix LwF-GKD label (if exiists) to LwF
    for appr in list(appr_dict.keys()):
        if 'lwf' in appr:
            appr_dict[appr] = 'LwF'

    r_metrics_dict = dict([(metrics_dict[k], k) for k in metrics_dict])
    metrics = [m for i, m in enumerate(list(metrics_dict.values())) if metric in list(metrics_dict.keys())[i].lower()]
    for scenario in data['Scenario'].unique() if not selected_scenario else selected_scenario:
                
        fig = go.Figure()
        
        df_g0 = data[(data['Scenario'] == scenario) & (data['Metric'] == 'Average')].groupby('Approach')
#         approaches = list(df_g0['Value [%]'].mean().sort_values(ascending=False).index)
        if sorted_approaches:
            approaches = sorted_approaches[scenario]
        else:
            approaches = list(df_g0['Rank'].mean().sort_values(ascending=True).index)
        approaches = [a for a in approaches if a in selected_approaches]
        if show_annotations:
            best_by_metric={}
            worst_by_metric={}
            best_by_value={}
            worst_by_value={}
            for met, dfmet in data[
                (data['Scenario'] == scenario) &
                (data['Metric'].isin([r_metrics_dict[m] for m in metrics])) &
                (data['Approach'].isin(approaches))].groupby(['Metric']):
                if top:
                    dfmet=dfmet[dfmet['Approach'].isin(approaches[:top])]
                # Exclude scratch from list of ordered approch by mean (:-1)
                ordered_appr=list(dfmet.groupby('Approach')[r].mean().sort_values().index)[:-1]
                ordered_appr_values=list(dfmet.groupby('Approach')[r].mean().sort_values())[:-1]
                ordered_appr=[a for a in ordered_appr if a in appr_dict.keys()]
                best_by_metric[metrics_dict[met]]=ordered_appr[-1] if best_mode == 'max' else ordered_appr[0]
                worst_by_metric[metrics_dict[met]]=ordered_appr[0] if best_mode == 'max' else ordered_appr[-1]
                best_by_value[metrics_dict[met]]=round(ordered_appr_values[-1], 1) if best_mode == 'max' else round(ordered_appr_values[0], 1)
                worst_by_value[metrics_dict[met]]=round(ordered_appr_values[0], 1) if best_mode == 'max' else round(ordered_appr_values[-1], 1)
                    
            print('Best per metric:', best_by_metric)
            print('Worst per metric:', worst_by_metric)
            
            best_by_value = {item[0]: '{}%'.format(item[1]) for item in best_by_value.items()}
            worst_by_value = {item[0]: '{}%'.format(item[1]) for item in worst_by_value.items()}
            print('Best per value:', best_by_value)
            print('Worst per value:', worst_by_value)
        if min_max_norm:
            metric_avg_vals_dict = {metric: [] for metric in metrics_dict}
            for approach in approaches[:top] if top is not None and top < len(approaches) else approaches:
                df_g = data[(data['Approach'] == approach) & (data['Scenario'] == scenario)].groupby(['Metric'])
                for metric in metrics_dict:
                    metric_avg_vals_dict[metric].append(df_g.get_group(metric)[r].mean())
            min_max_dict = {metrics_dict[metric]: (min((val for val in metric_avg_vals_dict[metric] if not math.isnan(val))),
                                max((val for val in metric_avg_vals_dict[metric] if not math.isnan(val))))
                        for metric in metrics_dict}

        for approach in approaches[:top] if top is not None and top < len(approaches) else approaches:
            df_tmp = pd.DataFrame()
            df_g = data[(data['Approach'] == approach) & (data['Scenario'] == scenario)].groupby(['Metric'])
            

            for g, d in df_g:
                fact = 0
                df_tmp = df_tmp.append(
                    {'Metric': g, 'Value': abs(fact - d[r].mean()), 'Standard': d[r].std()}, ignore_index=True
                )


            df_tmp=df_tmp[df_tmp['Metric'].isin(metrics_dict.keys())]
            df_tmp.loc[:, 'Metric'] = df_tmp['Metric'].apply(lambda x: metrics_dict[x])
            
            if approach == 'scratch':
                print('Scratch Overall: %s' % df_tmp[df_tmp['Metric'] == 'Overall']['Value'].values[0])
            
            if min_max_norm:
                for key in min_max_dict:
                    df_tmp['Value'].loc[df_tmp['Metric'] == key] = (df_tmp['Value'] - min_max_dict[key][0]) / (min_max_dict[key][1] - min_max_dict[key][0])
            
            #Masking overall metric in plot
            if 'Overall' in df_tmp['Metric'].unique():
                df_tmp=df_tmp[df_tmp['Metric'] != 'Overall']

            radious = list(df_tmp[(df_tmp['Metric'].isin(metrics))]['Value'].values)
            radious = [radious[i] for i in order_metrics]
            radious += [radious[0]]
            
            thetas = list(df_tmp[(df_tmp['Metric'].isin(metrics))]['Metric'].values)
            thetas = [thetas[i] for i in order_metrics]
            
            if show_annotations:
                thetas=['<b>%s</b><br>&#9786;%s: %s<br>&#9785;%s: %s' % ('<br>'.join(t.split('-')), appr_dict[best_by_metric[t]], best_by_value[t], appr_dict[worst_by_metric[t]], worst_by_value[t]) for t in thetas]
            thetas += [thetas[0]]
            if not all([True if math.isnan(val) else False for val in radious]):
                fig.add_trace(
                    go.Scatterpolar(
                        r=radious,
                        theta=thetas,
                        fill='toself' if fill and plot_std else 'none',
                        line={'color': to_plotly_color(colors_dict[approach], 1.), 'width':5 if approach != 'scratch' else 0,
                              'dash': dashes.get(scenario, {}).get(approach, 'solid')},
                        fillcolor=to_plotly_color(colors_dict[approach], .15),
                        marker_symbol=markers_dict[approach] if approach != 'scratch' else 'star',
                        marker_size=15,
                        name=appr_dict[approach]
                    )
                )

                if plot_std:
                    radious_std=list(df_tmp[(df_tmp['Metric'].isin(metrics))]['Standard'].values)
                    radious_std = [radious_std[i] for i in [2, 3, 0, 1, 4]]

                    radious_std += [radious_std[0]]

                    radious_std_p=[m+d for (m,d) in zip(radious,radious_std)]
                    radious_std_n=[m-d for (m,d) in zip(radious,radious_std)]

                    fig.add_trace(
                        go.Scatterpolar(
                            r=radious_std_p,
                            theta=thetas,
                            fill='none',
                            line={'color': to_plotly_color(colors_dict[approach], .7), 'dash':'dot', 'width':3},
                            fillcolor=to_plotly_color(colors_dict[approach], .15),
                            marker_symbol=markers_dict[approach],
                            marker_size=0,
                            showlegend=False
                        )
                    )

                    fig.add_trace(
                        go.Scatterpolar(
                            r=radious_std_n,
                            theta=thetas,
                            fill='none',
                            line={'color': to_plotly_color(colors_dict[approach], .7), 'dash':'dot', 'width':3},
                            fillcolor=to_plotly_color([256,256,256,256], .3),
                            marker_symbol=markers_dict[approach],
                            marker_size=0,
                            showlegend=False
                        )
                    )
        
        title_text = 'Scenario %s' % scenario_descr_dict[scenario]
        if not show_title:
            print(title_text)
            title_text = None

        update_layout_dict = dict(
            title_text=title_text,
            font=dict(
                family='Arial',
                size=18,
                color='black'
            ),
            #legend_title_text='Average Rank' if not sorted_approaches else 'Area Ranking',
            width=width,
            height=height,
            showlegend=legend,
            # annotations=[dict(align='center')]
        )
        
        update_polars_dict = dict(
            radialaxis=dict(
                # showticklabels=False,
                range=r_range,
                angle = axis_angle,
                gridcolor='#bbbbbb',
                tickfont=dict(
                    family='Arial Black',
                    size=18,
                    color='black'
                ),
                dtick=10 if not min_max_norm else 0.1
            ),
            bgcolor='rgba(250, 250, 250, 200)',
            angularaxis_rotation=-20
        )

        fig.update_layout(**update_layout_dict)
        fig.update_polars(**update_polars_dict)

        fig.show()
        
        # How to suppress textbox ""Loading [MathJax]/extensions/MathMenu.js"" into pdf images  
        # https://github.com/plotly/plotly.py/issues/3469
        pio.full_figure_for_development(fig, warn=False)
        if savefig is not None:
            write_image(fig, '%s.pdf' % '_'.join(
                [savefig] + scenario_discrs_dict[scenario] + [metric] +
                ['%s'%('Top%s'%(top if top is not None and top<len(approaches) else ''))]), format=fig_format, engine='kaleido')


In [None]:
def sort_approaches_by_area(df, metrics_list, order_metrics_list):
    sorting_per_scenario={}
    for scenario in df['Scenario'].unique():
        approaches = df['Approach'].unique()
        approaches_values_arr = np.zeros(approaches.shape[0])
        for i in range(approaches.shape[0]):
            df_g = df[(df['Approach'] == approaches[i]) & (df['Scenario'] == scenario)].groupby(['Metric'])
            appr_df_tmp_list = []
            for g, d in df_g:
                fact = 0
                appr_df_tmp_list.append([g, abs(fact - d[r].mean()), d[r].std()])
            appr_df_tmp = pd.DataFrame(columns=['Metric', 'Value', 'Standard'], data=appr_df_tmp_list)
            appr_metrics_values = list(appr_df_tmp[appr_df_tmp['Metric'].isin(metrics_list)]['Value'].values)
            appr_metrics_values = [appr_metrics_values[i] for i in order_metrics_list]
            approaches_values_arr[i] = sum([appr_metrics_values[i] * appr_metrics_values[(i+1) % len(order_metrics_list)] for i in range(len(appr_metrics_values))])
        indexes = np.argsort(approaches_values_arr)
        if np.any(np.isnan(approaches_values_arr)):
            indexes_temp = np.zeros(indexes.shape[0], dtype=np.int16)
            indexes_nonan = indexes[:-1]
            indexes_temp[:-1] = indexes_nonan
            indexes_temp[-1] = indexes[-1]
            indexes = indexes_temp
        
        sorting_per_scenario[scenario] = [approaches[i] for i in indexes]
    return sorting_per_scenario

In [None]:
# import os

# metric = 'F1 Score'
# discr = 'c'
# r = 'Value [%]'
# metrics_list = ['%s' % metric,
#         discr + '%s Intransigence' % metric,
#         discr + '%s Forgetting' % metric,
#         discr + '%s Drop' % metric,
#         discr + '%s Drop Old' % metric]
# analysis_list = ['incr', 'naive']
# selected_scenario = ['$38_{2}$', '$39_{1}$', '$CV_{20}$']
# order_metrics_list = [2, 3, 0, 1, 4]
# legend = True
# ranking_path = os.path.join(img_path, 'ranking_normalized')
# if not os.path.exists(ranking_path):
#     os.makedirs(ranking_path)
# for analysis in analysis_list:
#         df = _df_rank[analysis][_df_rank[analysis]['Scenario'].isin(selected_scenario)]
#         sorted_approaches = sort_approaches_by_area(df, metrics_list, order_metrics_list)
#         plot_spasa_di_ranking_spider(df,
#             r='Value [%]', metric='f1', r_range=(-.1, 1), autoticks=True, plot_std=False, top=None,
#             fill=True, show_annotations=True, savefig=os.path.join(ranking_path, 'radar_metric_%s' % analysis),
#             show_title=False, axis_angle=90, selected_approaches=selected_approaches_dict[analysis], legend=legend,
#                                     sorted_approaches=sorted_approaches, order_metrics=order_metrics_list, dashes=linestyles[analysis],
#                                     selected_scenario=selected_scenario, min_max_norm=True)

In [None]:
# _df_rank['incr'].loc[(_df_rank['incr']['Approach'] == 'bic') & (_df_rank['incr']['Metric'] == 'F1 Score'), 'Value [%]'] = np.nan
# import os

# for analysis in selected_approaches_dict:
#     ranking_by_area = False
#     legend = True
    
#     ranking_path = os.path.join(img_path, 'ranking%s%s' % ('_by_area' if ranking_by_area else '', '_noleg' if not legend else ''))
#     if not os.path.exists(ranking_path):
#         os.makedirs(ranking_path)
        
#     plot_spasa_di_ranking_spider(_df_rank[analysis],
#         r='Value [%]', metric='f1', r_range=(0, 100), autoticks=True, plot_std=False, top=None,
#         fill=True, show_annotations=True, savefig=os.path.join(ranking_path, 'radar_metric_%s' % analysis),
#         show_title=False, axis_angle=90, selected_approaches=selected_approaches_dict[analysis],
#                                  ranking_by_area=ranking_by_area, legend=legend, dashes=linestyles[analysis])
#     plot_spasa_di_ranking_spider(_df_rank,
#         r='Rank', metric='f1', r_range=(len(selected_approaches_dict[analysis]) + 1, 1), autoticks=True, plot_std=False,
#         top=None, fill=True, show_annotations=True, savefig=os.path.join(ranking_path, 'radar_rank_%s' % analysis),
#         best_mode='min', show_title=False, axis_angle=90, selected_approaches=selected_approaches_dict[analysis])

In [None]:
# _df_rank['incr'].loc[(_df_rank['incr']['Approach'] == 'bic') & (_df_rank['incr']['Metric'] == 'F1 Score'), 'Value [%]'] = np.nan
import os
metric = 'F1 Score'
discr = ''
r = 'Value [%]'
"""metrics_list = ['%s' % metric,
        discr + '%s Intransigence' % metric,
        discr + '%s Forgetting' % metric,
        discr + '%s Drop' % metric,
        discr + '%s Drop Old' % metric]
"""
metrics_list = ['%s' % metric,
        discr + '%s Intransigence' % metric,
        discr + '%s Drop' % metric,
        discr + '%s Drop Old' % metric]
#selected_scenario = ['$38_{2}$', '$39_{1}$', '$CV_{20}$']
selected_scenario = ['$39_{1}$', '$CV_{20}$']
# ToDo: Generalize order_metrics_list in order to test all area combinations
#order_metrics_list = [2, 3, 0, 1, 4]
order_metrics_list = [2, 0, 1]
#ranking_by_area_list = [False, True]
ranking_by_area_list = [False]
legend = True
sorted_approaches = None
analysis_list = ['incr', 'naive']
for ranking_by_area in ranking_by_area_list:
    if not ranking_by_area:
        fig_path = os.path.join(img_path, 'radarplot_final')
    else:
        fig_path = os.path.join(img_path, 'ranking%s%s' % ('_by_area' if ranking_by_area else '', '_noleg' if not legend else ''))
    if not os.path.exists(fig_path):
        os.makedirs(fig_path)
    for analysis in analysis_list:
        df = _df_rank[analysis][_df_rank[analysis]['Scenario'].isin(selected_scenario)]
        if ranking_by_area:
            print('Sorting approaches by area')
            sorted_approaches = sort_approaches_by_area(df, metrics_list, order_metrics_list)
            print(sorted_approaches)
        else:
            sorted_approaches = {scenario: [key for key in appr_dict_alph_sorted if key in selected_approaches_dict[analysis]] for scenario in selected_scenario}
        plot_spasa_di_ranking_spider(df,
            r='Value [%]', metric='f1', r_range=(95, -5), autoticks=True, plot_std=False, top=None,
            fill=True, show_annotations=True, savefig=os.path.join(fig_path, 'radar_metric_%s' % analysis),
            show_title=False, axis_angle=90, selected_approaches=selected_approaches_dict[analysis], legend=legend,
                                    sorted_approaches=sorted_approaches, order_metrics=order_metrics_list, dashes=linestyles[analysis],
                                    selected_scenario=selected_scenario, min_max_norm=False, best_mode='min', fig_format='pdf')


In [None]:
# plot_spasa_di_ranking_spider(r='Rank', metric='f1', r_range=(len(selected_approaches) + 1, 1), autoticks=True, plot_std=True, top=3, fill=False, show_annotations=True, savefig=os.path.join(ranking_path, 'radar_rank'), best_mode='min', show_title=False, axis_angle=90)

In [None]:
# _df_rank

In [None]:
import itertools
import math
index_list = [i for i in range(len(metrics_list))]
perm_list = list(itertools.permutations(index_list))[:int(math.factorial(len(metrics_list)) / len(metrics_list))]
are_ranking_equals_per_analysis = {}
for analysis in ['incr', 'naive']:
    df = _df_rank[analysis][_df_rank[analysis]['Scenario'].isin(selected_scenario)]
    ranking_all_perm_list = [sort_approaches_by_area(df, metrics_list, list(perm)) for perm in perm_list]
    ranking_per_scenario_dict = {scenario: [] for scenario in selected_scenario}
    for ranking_all_scenario in ranking_all_perm_list:
        for scenario in selected_scenario:
            ranking_per_scenario_dict[scenario].append(ranking_all_scenario[scenario])
    are_ranking_equals_per_analysis[analysis] = {}
    for scenario in selected_scenario:
        ranking_sample = ranking_per_scenario_dict[scenario][0]
        are_ranking_equals_per_analysis[analysis][scenario] = all([True if ranking_sample == ranking_per_scenario_dict[scenario][i] else False for i in range(1, len(ranking_per_scenario_dict[scenario]))])
            
print(are_ranking_equals_per_analysis)

# Approaches Comparison by Metric Value Drop from Previous

In [None]:
# m=df_rank[(df_rank['Metric'].isin(['F1 Score All', 'F1 Score Drop All'])) & (df_rank['Scenario'] == '$CV_{5}$')].groupby(['Approach', 'Increment', 'Metric', 'Scenario'])[['Rank', 'Value [%]','Drop from Previous [%]']].mean()
# m[m['Rank']<3]

In [None]:
# plot_spasa_di_ranking(y='Drop from Previous [%]', ylim=(None, None))

# Approaches Comparison by Metric Value

In [None]:
# plot_spasa_di_ranking(y='Value [%]', ylim=(None, 100))

# Approaches Comparison by Ranking

In [None]:
# plot_spasa_di_ranking(y='Rank', ylim=(0, 11))

In [None]:
# import os
# import portalocker as pl
# import json

# def get_ts(df_filename):
#     return '-'.join(df_filename.split('/')[-1].split('-')[1:]).split('_')[0]


# def get_args_dict(df_filename):
#     ts = get_ts(df_filename)
#     try:
#         with open('/'.join(df_filename.split('/')[:-2] + ['args-%s.txt' % ts])) as fin:
#             return json.loads(fin.read())
#     except FileNotFoundError as _:
#         with open('/'.join(df_filename.split('/')[:-1] + ['args-%s.txt' % ts])) as fin:
#             return json.loads(fin.read())

# def preprocess_metrics(metric, metr_dict, mety_dict, df_filenames, plot_path, analysis, override=True, appr_dict=None,
#                        nc_incr_tasks=None):
#     metric, metric_type = metric
#     print('Preprocessing %s %s %s' % (analysis, metric, metric_type))
    
# #     if metric_type != 'intransigence':
# #         return pd.DataFrame()

#     material_path = os.path.join(plot_path, 'material')
#     with pl.Lock('lock'):
#         if not os.path.exists(material_path):
#             os.makedirs(material_path)
            
#     if nc_incr_tasks is None:
#         nc_incr_tasks = [0] * len(df_filenames)

#     discr = mety_dict[metric_type]
#     normal_discr = mety_dict['normal'][analysis]
#     try:
#         discr = discr[analysis]
#     except:
#         pass

#     average_analysis = analysis in ['all', 'new', 'old']
#     normal_metric_col = ' '.join([metr_dict[metric], normal_discr]).strip()

#     preprocessed_fn = os.path.join(material_path,
#                                    '%s_metrics_%s%s-material.parquet' % (analysis, metric, discr.lower()))
#     if os.path.exists(preprocessed_fn) and override is None:
#         override = input('File "%s" exists: override [Y, n]? ' % preprocessed_fn).lower() != 'n'
#     if not os.path.exists(preprocessed_fn) or override:  # Generate or Override
#         common_columns = ['Seed', 'Network', 'Approach', 'Episode', 'Batch Size', 'Patience', 'Increment']
#         if average_analysis:
#             df = pd.DataFrame(columns=common_columns + [normal_metric_col])
#         else:  # if analysis == 'per_episode':
#             df = pd.DataFrame(columns=common_columns + ['Task', normal_metric_col])
#         for df_filename, nc_incr_task in tqdm(list(zip(df_filenames, nc_incr_tasks))):
#             args_dict = get_args_dict(df_filename)
#             assert args_dict.get('nc_incr_tasks', nc_incr_task) > 0, 'Please set --nc-incr-tasks argument.'
#             row = {'Seed': args_dict['seed'], 'Network': args_dict['network'],
#                    'Approach': '%s' % appr_dict[args_dict['approach']], 'Batch Size': args_dict['batch_size'],
#                    'Patience': args_dict['lr_patience'],
#                    'Out Features Size': args_dict.get('out_features_size', 200),
#                    'Momentum': args_dict.get('momentum', .0)}
#             tmp_df = pd.read_parquet(df_filename)
#             tmp_df[metric] = tmp_df[metric].apply(lambda x: [np.nan if v is None else v for v in x])
#             if 'nc_first_task' in tmp_df:
#                 tmp_df_g = tmp_df.groupby(['nc_first_task', 'nc_incr_tasks'])
#             else:
#                 tmp_df_g = [((args_dict['nc_first_task'], args_dict['nc_incr_tasks']), tmp_df)]
#             for g, tmp_d in tmp_df_g:
#                 values = [[v0 * 100 for v0 in v] for v in tmp_d[metric].values]
#                 _nc_first_task = g[0]
#                 _nc_incr_tasks = g[1]
#                 for ep, value in enumerate(values):
#                     row.update({'Episode': ep,
#                                 'Increment': _nc_incr_tasks,
#                                 '#Apps': _nc_first_task + _nc_incr_tasks * ep})
#                     if average_analysis:
#                         if 'all' in analysis:
#                             row.update({normal_metric_col: value[0], 'Type': 'All'})
#                             df = df.append(row, ignore_index=True)
#                         else:
#                             for val, type in zip(value, ['Old', 'New']):
#                                 row.update({normal_metric_col: val, 'Type': type})
#                                 df = df.append(row, ignore_index=True)
#                     else:  # if analysis == 'per_episode':
#                         for task, val in enumerate(value):
#                             row.update({'Task': task, normal_metric_col: val})
#                             df = df.append(row, ignore_index=True)
#         assert len(df) > 0
#         if metric_type in ['expectation', 'intransigence']:
#             df_UB = df[df['Approach'] == 'Scratch'].reset_index(drop=True)
#             df = df[df['Approach'] != 'Scratch'].reset_index(drop=True)
#             metric_col = ' '.join([metr_dict[metric], discr]).strip()
#             columns = [col for col in list(df.columns) if
#                        col not in [normal_metric_col, 'Approach', 'Momentum',
# #                                    'Episode', 'Increment', 'Task'
#                                   ]]
#             episode_index = columns.index('Episode')
#             try:
#                 task_index = columns.index('Task')
#             except ValueError as _:
#                 pass
#             df[metric_col] = np.nan
#             df_g = df.groupby(columns)
#             groups = list(df_g.groups)
#             del df_g
#             for group in groups:
#                 df_filter = functools.reduce(lambda x, y: x & y, [df[col] == g for col, g in zip(columns, group)])
#                 df_UB_filter = functools.reduce(lambda x, y: x & y, [df_UB[col] == g for col, g in zip(columns, group)])
#                 if not len(df_UB.loc[df_UB_filter]):
#                     print('WARNING: group (%s) does not present Upperbound model.' % ', '.join([str(v) for v in group]))
#                     continue
#                 else:
#                     ub_value = df_UB.loc[df_UB_filter, normal_metric_col].values[0]
#                     df.loc[df_filter, metric_col] = df.loc[df_filter, normal_metric_col].apply(
#                         (lambda x: x / ub_value * 100) if metric_type == 'expectation' else (lambda x: ub_value - x)
#                     )
#         df.to_parquet(preprocessed_fn)
#     else:
#         print('WARNING: using already computed material.')
#         df = pd.read_parquet(preprocessed_fn)

#     if '#Apps' in df:
#         min_classes = df[df['Approach'] != appr_dict['scratch']]['#Apps'].min()
#         df = df[df['#Apps'] >= min_classes]

#     print(df)
#     return df

In [None]:
# res_path = '/media/nas/datasets/MIRAGE_2020/FSCIL_approaches/hf-project/results'
# analysis = 'per_episode_metrics'

# ms = ['accuracy_score', 'forgetting_accuracy_score', 'accuracy_score', 'forgetting_f1_score']
# m_dict = {'accuracy_score':'Accuracy', 'forgetting_accuracy_score':'Accuracy Forgetting',
#           'forgetting_f1_score':'F1 Score Forgetting'}
# ts = ['normal', 'normal', 'intransigence', 'normal']
# t_dict = {'normal':{analysis:'Per Episode'}, 'intransigence':{analysis:'Per Episode Intransigence'}}
# # appr_dict = {'icarl': 'iCaRL', 'icarlo': 'iCaRL-original', 'icarlp': 'iCaRL+', 'bic': 'BiC', 'il2m': 'IL2M',
# #                  'lwf': 'LwF', 'finetuning': 'FineTuning', 'lucir': 'LUCIR', 'ewc': 'EWC', 'joint': 'Joint',
# #                  'scratch': 'Scratch', 'freezing': 'Fixed-Repr', 'eeil': 'EEIL', 'ssil': 'SS-IL'}

# per_episode_dfs = dict()

# for scenario_discrs in scenario_discrs_dict:
#     per_episode_dfs[scenario_discrs] = dict()
#     for img_d in scenario_discrs_dict[scenario_discrs]:
#         per_episode_dfs[scenario_discrs][img_d] = dict()
#         exp_name = '*'.join([img_d[i:i+2] for i in range(0,len(img_d),2)])
#         filenames = glob('%s/*%s/**/*%s.parquet' % (res_path, exp_name, analysis), recursive=True)
#         filenames.extend(glob('%s_UB/*%s/**/*%s.parquet' % (res_path, 'scratch', analysis), recursive=True))
#         plot_path = glob('%s/%s_*/' % (img_path, img_d))[0]
#         for m, t in zip(ms[-1:], ts[-1:]):
#             df = preprocess_metrics((m, t), m_dict, t_dict, filenames,
#                                     plot_path, analysis, False, appr_dict)
#             per_episode_dfs[scenario_discrs][img_d]['%s_%s' % (m, t)] = df


In [None]:
# tmp_df = per_episode_dfs['CV']['can5ba20inr5stp5']['accuracy_score_normal']
# tmp_df[(tmp_df['Episode']==2) & (tmp_df['Task']==2) & (tmp_df['Seed']==0) & (tmp_df['Increment']==5)]

In [None]:
# tmp_df = per_episode_dfs['CV']['can5ba20inr5stp5']['forgetting_accuracy_score_normal']
# tmp_df[(tmp_df['Episode']==2) & (tmp_df['Task']==2) & (tmp_df['Seed']==0) & (tmp_df['Increment']==5)]

In [None]:
# tmp_df = per_episode_dfs['CV']['can5ba20inr5stp5']['accuracy_score_intransigence']
# tmp_df[(tmp_df['Episode']==2) & (tmp_df['Task']==2) & (tmp_df['Seed']==0) & (tmp_df['Increment']==5)]