# Notebook utilised for processing results of CEC22 Manuscrip 2476
Title: **A Transfer Learning Hyper-heuristic Approach for Automatic Tailoring of Unfolded Population-based Metaheuristics**
Authors: Jorge M. Cruz-Duarte, Ivan Amaya, José Carlos Ortiz-Bayliss, and Nelishia Pillay

Load some packages and set some parameters for plotting and showing things


In [None]:
# Processing parameters
is_saving = False  # Please, check if you want to save files
saving_format = 'png'

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
# Load data
import tools as tl
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from scipy.stats import rankdata
from mpl_toolkits.mplot3d import Axes3D
import os
import seaborn as sns
import benchmark_func as bf

# Basic collection and results for comparison:
basic_collection_file = './collections/basicmetaheuristics.txt'
basic_results_file = './data_files/basic-metaheuristics-data_v2.json'

# Heuristic collection used in the experiments:
collection_file = './collections/default.txt'

# Datafiles from the weight matrix for comparison purposes
datafile_names_uMHs = [
    "unfolded_hhs_pop30",
    "unfolded_hhs_pop50",
    "unfolded_hhs_pop100"
]

# Results from the first experiment:
datafile_names_exp1 = [
    "transfer_learning_dynamic_pop30_exp1",
    "transfer_learning_dynamic_pop50_exp1",
    "transfer_learning_dynamic_pop100_exp1",
    "transfer_learning_static_pop30_exp1",
    "transfer_learning_static_pop50_exp1",
    "transfer_learning_static_pop100_exp1",
]

# Output folder for the figures and results
image_folder_name = './data_files/exp_figures/'
datafolder_name = "./data_files/exp_output/"

# Some lists with useful information
chosen_categories = ['Differentiable', 'Unimodal']
short_cat = ['Diff.', 'Unimod.']

chosen_categories_inv = ['Non-differentiable', 'Multimodal']
tac_trohs = ['Non-Diff.', 'Multimod.']

case_label = 'DU'

def bin2cat(bin_str):
    """
    This function converts binary-based category into string-based one
    :param bin_str: Binary string category
    :return: Human readable string category
    """
    return " and ".join([short_cat[ichar] if (char == '1') else tac_trohs[ichar]
                         for (ichar, char) in enumerate(str(bin_str))])

# Use them to get categories
cat_order = [bin2cat(x) for x in ['11', '10', '01', '00']]

# Use if you need to disregard certain problems
problems_to_disregard = []

# Special adjustments for the plots
sns.set(context="paper", font_scale=1, palette="colorblind", style="ticks",
        rc={'text.usetex': True, 'font.family': 'serif', 'font.size': 24,
            "xtick.major.top": False, "ytick.major.right": False})
plt.rcParams.update({'font.size': 24, 'text.usetex': True, 'font.family': 'serif'})

# Read operators and find their alias
with open(collection_file, 'r') as operators_file:
    encoded_heuristic_space = [eval(line.rstrip('\n')) for line in operators_file]

# Search operator (perturbator and selector) aliases
perturbator_alias = {
    'random_search': 'RS',
    'central_force_dynamic': 'CF',
    'differential_mutation': 'DM',
    'firefly_dynamic': 'FD',
    'genetic_crossover': 'GC',
    'genetic_mutation': 'GM',
    'gravitational_search': 'GS',
    'random_flight': 'RF',
    'local_random_walk': 'RW',
    'random_sample': 'RX',
    'spiral_dynamic': 'SD',
    'swarm_dynamic': 'PS'}

selector_alias = {
    'greedy': 'g',
    'all': 'd',
    'metropolis': 'm',
    'probabilistic': 'p'}

# Pre-process operator families
operator_families = {y: i for i, y in enumerate(sorted([x for x in perturbator_alias.values()]))}

# Pre-build the alias list
heuristic_space = [perturbator_alias[x[0]] + selector_alias[x[2]] for x in encoded_heuristic_space]

# Find repeated elements
for heuristic in heuristic_space:
    concurrences = tl.listfind(heuristic_space, heuristic)
    if len(concurrences) > 1:
        for count, idx in enumerate(concurrences):
            heuristic_space[idx] += f'{count + 1}'

# Read basic metaheuristics
with open(basic_collection_file, 'r') as operators_file:
    basic_mhs_collection = [eval(line.rstrip('\n')) for line in operators_file]

# Read basic metaheuristics cardinality
basic_mhs_cadinality = [1 if isinstance(x, tuple) else len(x) for x in basic_mhs_collection]

# Load data from basic metaheuristics
basic_mhs_data = tl.read_json(basic_results_file)

# Read (of create if so) a folder for storing images
if not os.path.isdir(image_folder_name):
    os.mkdir(image_folder_name)

# Define the performance function
def get_performance(y):
    """
    Function to calculate the performance values from a fitness register
    :param y: Array with fitness values
    :return: Performance value = (median + interquartile_range)(fitness_values)
    """
    return np.median(y) + stats.iqr(y)

In [None]:
def process_basic_mhs():
    """
    This function only reads the dataset from basic metaheuristics and transforms it into a `pandas.DataFrame`
    """
    temp_long_dimensions = basic_mhs_data['dimensions']

    # Call the problem categories
    problem_features = bf.list_functions(fts=chosen_categories)

    # Create a data frame
    data_table = pd.DataFrame({
        'Method': ['Basic'] * len(temp_long_dimensions),
        'Pop': [30] * len(temp_long_dimensions),
        'Dim': [x for x in basic_mhs_data['dimensions']],
        'Problem': basic_mhs_data['problem'],
        'Cat': [problem_features[x]['Code'] for x in basic_mhs_data['problem']],
        'uMH': [x['operator_id'][np.argmin(x['performance'])] for x in basic_mhs_data['results']],
        'Performance': [get_performance(x['performance']) for x in basic_mhs_data['results']]
    })

    return data_table.sort_values(by=['Pop', 'Cat', 'Problem', 'Dim'], ignore_index=True)

# Use the processing routine
data_table_basic = process_basic_mhs()

# Show the data
data_table_basic

## 0. Define some tools to use
### Adjusting function
We employed the expression $\hat{x} = \text{sign}(x) \log{(|x|+1)}$ to adjust the fitness values from extremely high values (in magnitude) to a short range.

In [None]:
# Define the rescaling function
def rescaling_function(values):
    # This function rescales any fitness value to a magnitude-based representation for enhancing its comparison against other values
    return (-1 if values < 0 else 1) * np.log10(np.abs(values) + 1)

# Show how this function works
fig = plt.figure(figsize=(5, 4))
x_values = np.linspace(-1e3, 1e3, 5000)
plt.plot(x_values, np.vectorize(rescaling_function)(x_values), c='r', linewidth=2)
plt.xlabel(r"Original value, $f$")
plt.ylabel(r"Rescaled value, $\hat{f}$")
plt.show()

### Reading the base dataset for comparisons
In this part, we only read and process the base dataset for comparison purposes

In [None]:
# Define the method for processing data from unfolding metaheuristics
def process_data(dataframe_filename):
    # Read the data file and assign the variables
    data_frame = tl.read_json(f'data_files/{dataframe_filename}.json')

    # Read all the dimensions (repeated)
    temp_long_dimensions = data_frame['dimensions']

    # Call the problem categories
    problem_features = bf.list_functions(fts=chosen_categories)

    # Create a data frame
    data_table = pd.DataFrame({
        'Method': ['Base'] * len(temp_long_dimensions),
        'Pop': [int(dataframe_filename.split('pop')[-1])] * len(temp_long_dimensions),
        'Dim': [x for x in data_frame['dimensions']],
        'Problem': data_frame['problem'],
        'Cat': [problem_features[x]['Code'] for x in data_frame['problem']],
        'uMH': [x['encoded_solution'][-1] for x in data_frame['results']],
        'Performance': [x['performance'][-1] for x in data_frame['results']],
        'BestFitness': [[y[-1] for y in x['hist_fitness']] for x in data_frame['results']],
        'PerformanceEvolution': [x['performance'] for x in data_frame['results']]
    })

    # Compute the metric using the rescaling function
    data_table['Metric'] = data_table['BestFitness'].apply(
            lambda x: [rescaling_function(y) for y in x])

    return data_table

# Create an empty list and process all the base dataset files
data_table_exp0_list = list()
for datafile_name in datafile_names_uMHs:
    data_table_exp0_list.append(process_data(datafile_name))

# Store this information in a dataframe and show it
data_table_exp0 = pd.concat(data_table_exp0_list, ignore_index=True)
data_table_exp0

### Prepare methods to read and process the resulting datasets
In this case, we proceed to prepare methods to read and process the datasets from the resulting experiments

In [None]:
# Prepare the method for reading dataframes and building the datatable with pandas
def get_datatable(dataframe_filename):
    # Generate key for the output dictionary
    key, pop, kind = dataframe_filename.split('_')[2:]

    # Just for testing
    full_datafile_name = datafolder_name + dataframe_filename + '.json'

    data_frame = tl.read_json(full_datafile_name)

    # Read dimensions and problems
    long_dimensions = data_frame['dimensions']
    long_problems = data_frame['problem']

    dimensions = sorted(list(set(long_dimensions)))
    problems = sorted(list(set(long_problems)))

    num_prob = len(problems)
    num_dime = len(dimensions)

    # Call the problem categories
    problem_features = bf.list_functions(fts=chosen_categories)

    # Check if it is `dynamic` or `static`
    if key == 'dynamic':
        # Create a data frame
        data_table = pd.DataFrame({
            'Method': ['Dynamic'] * len(long_dimensions),
            'Pop': [int(pop[3:])] * len(long_dimensions),
            'Dim': [x for x in data_frame['dimensions']],
            'Problem': data_frame['problem'],
            'Cat': [problem_features[x]['Code'] for x in data_frame['problem']],
            'BestFitness': [[y[-1] for y in x['details']['fitness_per_rep']] for x in data_frame['results']],
            'Sequences': [x['details']['sequence_per_rep'] for x in data_frame['results']],
            'FitnessEvolution': [x['details']['fitness_per_rep'] for x in data_frame['results']],
        })\
            .sort_values(by=['Pop', 'Cat', 'Problem', 'Dim'], ignore_index=True)

        # data_table['Metric'] = data_table['BestFitness'].apply(rescaling_function)
        data_table['Metric'] = data_table['BestFitness'].apply(
            lambda x: [rescaling_function(y) for y in x])

        weight_table = pd.DataFrame({
            'Pop': [int(pop[3:])] * len(long_dimensions),
            'Dim': [str(x) for x in data_frame['dimensions']],
            'Problem': data_frame['problem'],
            'Cat': [problem_features[x]['Code'] for x in data_frame['problem']],
            'Weights': [x['details']['weight_matrix'] for x in data_frame['results']],
        })
    else:  # 'static'
        # Create a data frame
        data_table = pd.DataFrame({
            'Method': ['Static'] * len(long_dimensions),
            'Pop': [int(pop[3:])] * len(long_dimensions),
            'Dim': [x for x in data_frame['dimensions']],
            'Problem': data_frame['problem'],
            'Cat': [problem_features[x]['Code'] for x in data_frame['problem']],
            'Performance': [x['performance'][-1] for x in data_frame['results']],
            'BestFitness': [[y[-1] for y in x['hist_fitness']] for x in data_frame['results']],
            'Sequences': [x['encoded_solution'][-1] for x in data_frame['results']],
            'PerformanceEvolution': [x['performance'][::2] for x in data_frame['results']],
        }).sort_values(by=['Pop', 'Cat', 'Problem', 'Dim'], ignore_index=True)

        data_table['Metric'] = data_table['BestFitness'].apply(
            lambda x: [rescaling_function(y) for y in x])

        weight_table = None

    return key + pop[3:] + kind, data_table, weight_table


## 1. Adjusted best-fitness values
This is a glance for the selected problems and their adjusted best-fitness values. In this case, we plot the fitness values (adjusted) using the expression $\hat{x} = \text{sign}(x) \log{(|x|+1)}$, and consider all the two groups of datasets from experimental results

In [None]:
# Prepare methods for getting data from the datasets
def get_experiment_data(datafile_names_exp):
    """
    This method applies the above defined method to process datafiles for a given experiment dataset.
    :param datafile_names_exp: list, list of datafile names related to experiment datasets
    :returns: three lists: `data_info_exp_list` with the information related to experiments
                           `data_table_exp_list` with the datasets
                           `weight_table_exp_list` with the wight matrices

    """
    data_info_exp_list = list()
    data_table_exp_list = list()
    weight_table_exp_list = list()

    for datafile_name in datafile_names_exp:
        data_info_exp, data_table_exp, weight_table_exp = get_datatable(datafile_name)

        data_info_exp_list.append(data_info_exp)
        data_table_exp_list.append(data_table_exp)
        weight_table_exp_list.append(weight_table_exp)

    return data_info_exp_list, data_table_exp_list, weight_table_exp_list


def plot_metrics(data_info_list, data_table_list):
    """
    This method is used for plotting metrics in an early state of analysis
    :param data_info_list: list, information related to experiments
    :param data_table_list: list, datasets
    """

    for die2, dte2 in zip(data_info_list, data_table_list):
        facet_out1 = sns.catplot(
            data=dte2[['Cat', 'Problem', 'Dim', 'Metric']].explode(
                'Metric', ignore_index=True),
            col='Problem', y='Metric', x='Dim', col_wrap=8, kind='box', orient='v',
            sharey=False, height=2, facet_kws=dict(despine=False, palette='dark'))

        facet_out1.set_titles(col_template="{col_name}")

        if is_saving:
            plt.savefig(image_folder_name + 'boxplots-{}.'.format(die2) + saving_format,
                       format=saving_format, dpi=333, transparent=True)

        plt.show()

### Process and plot metrics for the first experiment
In this, we use the above defined methods for showing an overview of results

In [None]:
data_info_exp1_list, data_table_exp1_list, weight_table1_exp_list = get_experiment_data(datafile_names_exp1)
plot_metrics(data_info_exp1_list, data_table_exp1_list)

## 2. Data reported in the manuscript
In this section, we organised those plot that were included in the manuscript.
The comparison between the experiment 1 and the experiments 0 and basic metaheuristics, these two sets comprise the same problems, populations and dimensions.

In [None]:
# Merge data_tables from experiment 1
data_table_exp1 = pd.concat(data_table_exp1_list, ignore_index=True)
data_table_exp1['Problem'].unique()

# Read the problems considered for all the experiments (1 & 2)
problems_to_consider = data_table_exp1['Problem'].unique().tolist()

# Filter the data table for the experiment 0 using the problems from data_table for experiment 1
data_table_exp0['Problem'].unique()

# Filter to only use the problems
data_table_exp0 = data_table_exp0[data_table_exp0['Problem'].isin(problems_to_consider)]

# Show table from experiments 0
data_table_exp0

In [None]:
# Columns to consider in the concatenation
columns_to_concatenate = ['Method', 'Pop', 'Dim', 'Problem', 'Cat', 'BestFitness']

# Concatenate experiments 0 & 1
dt_comp_exp1 = pd.concat(
    [data_table_exp0[columns_to_concatenate]] +
    [dt[columns_to_concatenate] for dt in data_table_exp1_list],
    ignore_index=True)

# Adjust the performance value to have the same scale
dt_comp_exp1['Metric'] = dt_comp_exp1['BestFitness'].apply(
    lambda x: [rescaling_function(y) for y in x])

# Perform some adjustments to facilitate comparison
dt_comp_exp1['Dim'].unique()
dt_comp_exp1['DU'] = dt_comp_exp1['Cat'].map(bin2cat)
dt_comp_exp1 = dt_comp_exp1.sort_values(by=['Pop', 'Method', 'Dim', 'Cat', 'Problem'], ascending=[1, 1, 1, 0, 1], ignore_index=True)

# Show the resulting table
dt_comp_exp1

### Figure 2: Metric strips for all methods using DU, Pop, and Dim
This is a compact representation for analysing metric values via grouping by categories (DU), populaiton (Pop), and dimensionalities (Dim)


In [None]:
# Marginal plot related to Categories
with sns.color_palette("colorblind"):
    facet_out2 = sns.catplot(
        data=dt_comp_exp1.explode('Metric', ignore_index=True),
        col='Pop', y='Metric', x='Method', hue='Dim', kind='strip', orient='v',
        sharey=True, height=3, aspect=1, facet_kws=dict(despine=False, legend_out=False),
        margin_titles=True, legend=False, dodge=True)
    plt.subplots_adjust(hspace = 0.08, wspace=0.1)
    facet_out2.add_legend(title='Dimensions', bbox_to_anchor=(0.98, 0.5))
    facet_out2.set_titles(col_template="Population of {col_name}")

if is_saving:
    plt.savefig(image_folder_name + 'plot1-Metric-vs-DimPop_Exp1.' + saving_format,
               format=saving_format, dpi=333, transparent=True)

plt.show()

### Figure 3: Metric distribution for all methods using DU, Pop, and Dim.
This is a grid-like representation for analysing metric values via grouping by categories (DU), populaiton (Pop), and dimensionalities (Dim)

In [None]:
# Marginal plot related to Population
with sns.color_palette("colorblind"):
    facet_out2 = sns.catplot(
        data=dt_comp_exp1.explode('Metric', ignore_index=True),
        col='Pop', row='Dim', y='Metric', x='Method', hue='DU', kind='strip', orient='v',
        sharey=False, height=2, aspect=1.3, margin_titles=True, dodge=True, legend=False,
        facet_kws=dict(despine=False, legend_out=True))

    facet_out2.set_titles(col_template="Population of {col_name}",
                          row_template="{row_name}D")
    plt.subplots_adjust(hspace = 0.08, wspace=0.3)
    facet_out2.add_legend(loc='upper left', ncol=4, columnspacing=0.1,
                          bbox_to_anchor=(0.075, 1))

if is_saving:
    plt.savefig(image_folder_name + 'plot2-Metric-vs-All_Exp1.' + saving_format,
               format=saving_format, dpi=333, transparent=True)

plt.show()

### Table III: Wilcoxon's test for the experiments
In these plots, we show the $p$-value results for the Wilcoxon's test described in the manuscript

In [None]:
# Pick data for carrying out the statistical test
pivoted_table = dt_comp_exp1.pivot_table(
    index=['Pop', 'Dim', 'DU', 'Problem'], columns='Method', values='perf_alt').reset_index()
pivoted_table['BaseDynamic'] = pivoted_table['Dynamic'] - pivoted_table['Base']
pivoted_table['BaseStatic'] = pivoted_table['Static'] - pivoted_table['Base']

# Perform the test
pValue_table1 = pivoted_table.groupby(['Pop', 'Dim'])['BaseDynamic', 'BaseStatic'].agg(
    lambda z: stats.wilcoxon(x=z.values, alternative='less', zero_method='pratt')[-1]
).reset_index()

pValue_table1

# Showing the same info but in LaTeX
# print(
# pValue_table1.to_latex(
#     index=False, bold_rows=True, multirow=True, float_format="{:0.2e}".format)
# )

Analysing methods from a general point of view

### Figure 4: Frequency of methods that rank 1st
In this plot, we show the frequency of those methods that rank first when comparison is made using a given metric

In [None]:
# Add some statistics for ranking
new_columns = dict(
    # max_fitness = lambda x: x['BestFitness'].apply(np.max),
    # min_fitness = lambda x: x['BestFitness'].apply(np.min),
    # mean_fitness = lambda x: x['BestFitness'].apply(np.mean),
    # median_fitness = lambda x: x['BestFitness'].apply(np.median),
    perf_alt = lambda x: x['BestFitness'].apply(lambda y: get_performance(y))
)
dt_comp_exp1 = dt_comp_exp1.assign(**new_columns)

# Now do the ranking
columns_to_rank = ['Pop', 'Dim', 'Problem', 'Cat']

In [None]:
# Plot the corresponding figure for the frequencies of first places and grouping by dimensions and methods
def plot_comparison3_by(metric_column):
    dt_comp_exp1['Rank'] = dt_comp_exp1.groupby(columns_to_rank)[metric_column].rank('dense')
    dtce1_short = dt_comp_exp1.query("Rank == 1.0")
    dtce1_short = dtce1_short.groupby(['Pop', 'Dim'])['Method'].value_counts(normalize=True).rename('Frequency').reset_index()
    with sns.color_palette("colorblind"):
        facet_out1 = sns.catplot(data=dtce1_short,
                    hue='Method', x='Dim', y='Frequency', col='Pop', kind='bar',
                    height=3, aspect=1, ci=True, #fill=False, alpha=0.5, levels=5, linewidths=2,  #cut=1,
                    # warn_singular=False,
                    facet_kws=dict(despine=False)
                    )
    plt.subplots_adjust(hspace = 0.08, wspace=0.1)
    facet_out1.set_titles(col_template="Population of {col_name}")

    if is_saving:
        plt.savefig(image_folder_name + 'plot4-Rank-vs-DimPop_Exp1.' + saving_format,
                   format=saving_format, dpi=333, transparent=True)
    plt.show()

for col in new_columns.keys():
    print(col)
    plot_comparison3_by(col)

## 4. Additional figures
The figures and tables reported here just serve to analyse the resulting data. This information was not included due to space limitations.

### Graphical representation of Table III: Wilcoxon's test
In this figure, we show the graphical visualization of $p$-values

In [None]:
# Show the resulting p-values
sns.catplot(
    data=pValue_table1.melt(
        id_vars=['Pop', 'Dim'], value_vars=['BaseDynamic', 'BaseStatic'],
        var_name='Test', value_name='p-Value'
    ), x='Dim', col='Pop', y='p-Value', hue='Test', kind='bar', log=True
)
plt.show()

### Extended Figure 4: Frequency of methods that rank 1st
In this case, we considered all the grouping ways to represent these frequencies: Population, Methods, Dimensions, and Categories

In [None]:
def plot_comparison11_by(metric_column):
    dt_comp_exp1['Rank'] = dt_comp_exp1.groupby(columns_to_rank)[metric_column].rank('dense')
    dtce1_short = dt_comp_exp1.query("Rank == 1.0")
    dtce1_short = dtce1_short.groupby(['Pop', 'Dim', 'DU'])['Method']\
        .value_counts(normalize=True)\
        .rename('Frequency')\
        .reset_index()
    dtce1_short = dtce1_short.sort_values(
        by=['Pop', 'Method', 'Dim', 'DU'],
        ascending=[1, 1, 1, 1],
        ignore_index=True)

    with sns.color_palette("colorblind"):
        facet_out1 = sns.catplot(data=dtce1_short,
                    hue='DU', x='Method', y='Frequency', row='Dim', col='Pop', kind='bar',
                    height=2, aspect=1.3, ci=True, hue_order=cat_order,
                    facet_kws=dict(despine=False, legend_out=True), margin_titles=True,
                    legend=False)

    facet_out1.set_titles(col_template="Population of {col_name}",
                          row_template="{row_name}D")
    plt.subplots_adjust(hspace = 0.08, wspace=0.1)
    facet_out1.add_legend(loc='upper left', ncol=4, columnspacing=0.1,
                          bbox_to_anchor=(0.075, 1))
    if is_saving:
        plt.savefig(image_folder_name + 'plot3-Rank-vs-All_Exp1.' + saving_format,
                   format=saving_format, dpi=333, transparent=True)

    plt.show()

# Show the plot using a given metric from the `new_columns` dictionary
for col in new_columns.keys():
    print(col)
    plot_comparison11_by(col)

### Comparison with basic metaheuristics
These results were not included in the manuscript but show quite interesting results about the proposed method when compared against basic metaheuristics (most of them are state-of-the-art methods).

In [None]:
# Load and concatenate dataframes to perform comparisons

# Columns to be concatenated
columns_to_concatenate = ['Method', 'Pop', 'Dim', 'Problem', 'Cat', 'Performance']

# We assign the comparison column
dt_comp_exp1['Performance'] = dt_comp_exp1['perf_alt']

data_table_basic = data_table_basic[data_table_basic['Problem'].isin(problems_to_consider)]
data_table_basic = data_table_basic[data_table_basic['Dim'].isin(
    dt_comp_exp1['Dim'].unique())]
dt_comp_exp1_basic = pd.concat([
    dt_comp_exp1[columns_to_concatenate], data_table_basic[columns_to_concatenate]
])

We only carried out the comparison with populations of 30 agents because of the available dataset of basic metaheuristics

In [None]:
dt_comp_exp1_basic['Rank'] = dt_comp_exp1_basic.groupby(columns_to_rank)['Performance'].rank('dense')
dt_comp_exp1_basic['DU'] = dt_comp_exp1_basic['Cat'].map(bin2cat)
dtce1_short = dt_comp_exp1_basic.query("Pop == 30 & Dim == [2, 10, 30, 50] & Rank == 1.0")
dtce1_short = dtce1_short.groupby(['Dim', 'DU'])['Method'].value_counts(normalize=True).rename('Frequency').reset_index()

with sns.color_palette("colorblind"):
    # g = sns.FacetGrid(dt_comp_exp1.query("Rank == 1.0"), row='Pop', col='Cat')
    # g.map_dataframe(lambda data, color: sns.barplot(x=data['Dim'], y=data['Method']))

    facet_out1 = sns.catplot(data=dtce1_short,
                hue='Method', x='Dim', col='DU', col_wrap=4, y='Frequency', kind='bar',
                height=3, aspect=1, ci=True, #fill=False, alpha=0.5, levels=5, linewidths=2,  #cut=1,
                # warn_singular=False,
                facet_kws=dict(despine=False)
                )

    facet_out1.set_titles(col_template="{col_name}")
if is_saving:
    plt.savefig(image_folder_name + 'plot5-rank_Pop-Dim-Method.' + saving_format,
               format=saving_format, dpi=333, transparent=True)
plt.show()

Wilcoxon's test

In [None]:
pivoted_table2 = dt_comp_exp1_basic.pivot_table(
    index=['Pop', 'Dim', 'DU', 'Problem'], columns='Method', values='Performance'
).reset_index()
pivoted_table2['BasicBase'] = pivoted_table2['Base'] - pivoted_table2['Basic']
pivoted_table2['BasicDynamic'] = pivoted_table2['Dynamic'] - pivoted_table2['Basic']
pivoted_table2['BasicStatic'] = pivoted_table2['Static'] - pivoted_table2['Basic']
pValue_table2 = pivoted_table2.groupby(['Pop', 'Dim'])[
    'BasicBase', 'BasicDynamic', 'BasicStatic'].agg(
    lambda z: stats.wilcoxon(x=z.values, alternative='less', zero_method='pratt')[-1]
).reset_index()
#
sns.catplot(
    data=pValue_table2.melt(
        id_vars=['Pop', 'Dim'], value_vars=['BasicBase', 'BasicDynamic', 'BasicStatic'],
        var_name='Test', value_name='p-Value'
    ).query("Pop == 30"), y='p-Value', x='Dim', hue='Test', kind='bar'
)
# pValue_table1
pValue_table2.query("Pop == 30")