In [None]:
from niapy.algorithms.basic import (
    BatAlgorithm,
    ParticleSwarmAlgorithm,
    ParticleSwarmOptimization,
)
from msa.algorithms.fa import FireflyAlgorithm
from niapy.problems.schwefel import Schwefel
from sklearn.preprocessing import StandardScaler
import sklearn
from scipy import spatial, stats
import torch
from matplotlib import pyplot as plt
import os
import numpy as np
import pandas as pd
from msa.util.optimization_data import SingleRunData
from msa.util.pop_diversity_metrics import PopDiversityMetric
from msa.tools.optimization_tools import optimization_runner

from msa.util.constants import (
    DATASET_PATH,
    POP_SIZE,
    MAX_EVALS,
    OPTIMIZATION_PROBLEM,
    POP_DIVERSITY_METRICS,
    INDIV_DIVERSITY_METRICS,
)

BASE_PATH = "./archive/target_performance_similarity/01-29_15.27.22_WVCPSO_Schwefel"
_DATASET_PATH = f"{BASE_PATH}/dataset"
DATASET_PATH = f"{_DATASET_PATH}/0_subset"
MSA_PATH = f"{BASE_PATH}/msa_obj"

algorithms_to_plot = ['FA', 'WVCPSO', 'PSO', 'BA']

execute_training = True
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)
print("CPUs: ", os.cpu_count())

### Optimization

In [None]:
use_test_setting = True

problem = OPTIMIZATION_PROBLEM

if use_test_setting:
    problem = Schwefel(dimension=20)
    algorithms = [
        #FireflyAlgorithm(population_size=POP_SIZE, alpha=0.15, beta0=0.4, gamma=0.04, theta=0.98),
        FireflyAlgorithm(population_size=POP_SIZE, alpha=0.84464886, beta0=0.74171366, gamma=0.60686203, theta=0.97758844),
        #FireflyAlgorithm(population_size=POP_SIZE, alpha=0.02, beta0=0.43, gamma=0.693, theta=0.962),
        #ParticleSwarmAlgorithm(population_size=POP_SIZE, c1=1.14, c2=0.05, w=0.54),
        #ParticleSwarmAlgorithm(population_size=POP_SIZE, c1=2.00417841, c2=0.70674774, w=0.82266951),
        #ParticleSwarmAlgorithm(population_size=POP_SIZE, c1=1.95, c2=0.82, w=0.82),
        #BatAlgorithm(population_size=POP_SIZE, loudness=1.0, pulse_rate=1.0, alpha=0.99, gamma=0.1)
        #tools.algorithms.pso.ParticleSwarmAlgorithm(population_size=POP_SIZE, c1=1.95, c2=0.82, w=0.82),
    ]

for algorithm in algorithms:
    optimization_runner(
        algorithm=algorithm,
        problem=problem,
        runs=1,
        dataset_path="./dataset",
        pop_diversity_metrics=POP_DIVERSITY_METRICS,
        indiv_diversity_metrics=INDIV_DIVERSITY_METRICS,
        max_evals=MAX_EVALS,
        run_index_seed=True,
        keep_pop_data=False,
        parallel_processing=True,
    )

### Population diversity metrics comparison

In [None]:
pop_metrics_list = [
    PopDiversityMetric.PDC,
    PopDiversityMetric.FDC,
    PopDiversityMetric.PFSD,
    PopDiversityMetric.PFM,
]

for algorithm in os.listdir(DATASET_PATH):
    for problem in os.listdir(os.path.join(DATASET_PATH, algorithm)):
        runs = os.listdir(os.path.join(DATASET_PATH, algorithm, problem))
        runs.sort()
        run_path = os.path.join(DATASET_PATH, algorithm, problem, runs[0])
        srd = SingleRunData.import_from_json(run_path)
        pop_metrics = SingleRunData.import_from_json(run_path).get_pop_diversity_metrics_values(metrics=pop_metrics_list, minmax_scale=False, standard_scale=True)
        ax = pop_metrics.plot(figsize=(15,7), fontsize=19, logy=False)
        ax.set_title(label=" ".join([f"Populacijske metrike raznolikosti - {algorithm}", problem]), fontdict={'fontsize':22}, pad=15)
        ax.set_xlabel(xlabel="Iteracija", fontdict={'fontsize':19}, labelpad=10)
        ax.set_ylabel(ylabel="Vrednost", fontdict={'fontsize':19}, labelpad=10)
        ax.legend(fontsize=15)

In [None]:
line_styles = ['-g', ':g', '--g', '-.g', '-b', ':b', '--b', '-.b']
_line_styles = ['-g', '-b', '-r', '-k', ':g', ':b', ':r', ':k']
style = {}
pop_metrics_list = [
    PopDiversityMetric.PDC,
    PopDiversityMetric.FDC,
    PopDiversityMetric.PFSD,
    PopDiversityMetric.PFM,
]
style_idx = 0
for algorithm in os.listdir(DATASET_PATH):
    if algorithm not in algorithms_to_plot:
        continue
    for idx, metric in enumerate(pop_metrics_list):
        if idx > 3:
            continue
        style['_'.join([algorithm, metric.value])] = line_styles[style_idx]
        style_idx += 1

metrics_by_problem = {}
for algorithm in os.listdir(DATASET_PATH):
    if algorithm not in algorithms_to_plot:
        continue
    for problem in os.listdir(os.path.join(DATASET_PATH, algorithm)):
        runs = os.listdir(os.path.join(DATASET_PATH, algorithm, problem))
        runs.sort()
        run_path = os.path.join(DATASET_PATH, algorithm, problem, runs[0])
        run = SingleRunData.import_from_json(run_path)
        pop_metrics = run.get_pop_diversity_metrics_values(metrics=pop_metrics_list, minmax_scale=False)
        
        for metric in pop_metrics_list:
            key = '_'.join([algorithm, metric.value])
            if metric.value not in pop_metrics:
                continue

            if problem in metrics_by_problem:
                metrics_by_problem[problem][key] = pop_metrics.get(metric.value).to_list()
            else:
                metric_values = {key: pop_metrics.get(metric.value).to_list()}
                metrics_by_problem[problem] = metric_values

            # scale fdc to [0, 1] for easier comparison on logy scale
            if metric == PopDiversityMetric.FDC:
                metrics_by_problem[problem][key] = sklearn.preprocessing.minmax_scale(
                    metrics_by_problem[problem][key], feature_range=(0, 1)
                )

for problem in metrics_by_problem:
    metrics = metrics_by_problem[problem]
    df_metrics = pd.DataFrame.from_dict(metrics)
    ax = df_metrics.plot(style=style, figsize=(25, 7), logy=True, fontsize=15)
    ax.legend(fontsize=15)
    ax.set_title(label=problem, fontdict={'fontsize':24})
    ax.set_xlabel(xlabel="Iterations", fontdict={'fontsize':20})

#### all subsets

In [None]:
dataset_path = _DATASET_PATH

subsets = os.listdir(dataset_path)

for idx in range(len(subsets)):
    subset = f"{idx}_subset"

    line_styles = ['-g', ':g', '--g', '-.g', '-b', ':b', '--b', '-.b']
    _line_styles = ['-g', '-b', '-r', '-k', ':g', ':b', ':r', ':k']
    style = {}
    pop_metrics_list = [
        PopDiversityMetric.PDC,
        PopDiversityMetric.FDC,
        PopDiversityMetric.PFSD,
        PopDiversityMetric.PFM,
    ]
    style_idx = 0
    for algorithm in os.listdir(os.path.join(dataset_path, subset)):
        if algorithm not in algorithms_to_plot:
            continue
        for idx, metric in enumerate(pop_metrics_list):
            if idx > 3:
                continue
            style['_'.join([algorithm, metric.value])] = line_styles[style_idx]
            style_idx += 1

    metrics_by_problem = {}
    for algorithm in os.listdir(os.path.join(dataset_path, subset)):
        if algorithm not in algorithms_to_plot:
            continue
        for problem in os.listdir(os.path.join(dataset_path, subset, algorithm)):
            runs = os.listdir(os.path.join(dataset_path, subset, algorithm, problem))
            runs.sort()
            run_path = os.path.join(dataset_path, subset, algorithm, problem, runs[0])
            run = SingleRunData.import_from_json(run_path)
            pop_metrics = run.get_pop_diversity_metrics_values(metrics=pop_metrics_list, minmax_scale=False)
            
            for metric in pop_metrics_list:
                key = '_'.join([algorithm, metric.value])
                if metric.value not in pop_metrics:
                    continue

                if problem in metrics_by_problem:
                    metrics_by_problem[problem][key] = pop_metrics.get(metric.value).to_list()
                else:
                    metric_values = {key: pop_metrics.get(metric.value).to_list()}
                    metrics_by_problem[problem] = metric_values

                # scale fdc to [0, 1] for easier comparison on logy scale
                if metric == PopDiversityMetric.FDC:
                    metrics_by_problem[problem][key] = sklearn.preprocessing.minmax_scale(
                        metrics_by_problem[problem][key], feature_range=(0, 1)
                    )

    for problem in metrics_by_problem:
        metrics = metrics_by_problem[problem]
        df_metrics = pd.DataFrame.from_dict(metrics)
        ax = df_metrics.plot(style=style, figsize=(25, 7), logy=True, fontsize=15)
        ax.legend(fontsize=15)
        ax.set_title(label=subset, fontdict={'fontsize':24})
        ax.set_xlabel(xlabel="Iterations", fontdict={'fontsize':20})

### Average population diversity metrics for all runs by subset

In [None]:
dataset_path = _DATASET_PATH

subsets = os.listdir(dataset_path)

for idx in range(len(subsets)):
    subset = f"{idx}_subset"

    line_styles = ['-g', ':g', '--g', '-.g', '-b', ':b', '--b', '-.b']
    _line_styles = ['-g', '-b', '-r', '-k', ':g', ':b', ':r', ':k']
    style = {}
    pop_metrics_list = [
        PopDiversityMetric.PDC,
        PopDiversityMetric.FDC,
        PopDiversityMetric.PFSD,
        PopDiversityMetric.PFM,
    ]
    style_idx = 0
    for algorithm in os.listdir(os.path.join(dataset_path, subset)):
        if algorithm not in algorithms_to_plot:
            continue
        for idx, metric in enumerate(pop_metrics_list):
            if idx > 3:
                continue
            style['_'.join([algorithm, metric.value])] = line_styles[style_idx]
            style_idx += 1

    metrics_by_problem = {}
    for algorithm in os.listdir(os.path.join(dataset_path, subset)):
        if algorithm not in algorithms_to_plot:
            continue
        for problem in os.listdir(os.path.join(dataset_path, subset, algorithm)):
            runs = os.listdir(os.path.join(dataset_path, subset, algorithm, problem))
            runs.sort()
            for run_file_name in runs:
                run_path = os.path.join(dataset_path, subset, algorithm, problem, run_file_name)
                run = SingleRunData.import_from_json(run_path)
                pop_metrics = run.get_pop_diversity_metrics_values(metrics=pop_metrics_list, minmax_scale=False)
                
                for metric in pop_metrics_list:
                    key = '_'.join([algorithm, metric.value])
                    if metric.value not in pop_metrics:
                        continue

                    if problem in metrics_by_problem:
                        if key in metrics_by_problem[problem]:
                            sum_of_metrics = np.add(
                                metrics_by_problem[problem][key], pop_metrics.get(metric.value).to_numpy() / len(runs)
                            )
                            metrics_by_problem[problem][key] = sum_of_metrics
                        else:
                            metrics_by_problem[problem][key] = pop_metrics.get(metric.value).to_numpy() / len(runs)
                    else:
                        metric_values = {key: pop_metrics.get(metric.value).to_numpy() / len(runs)}
                        metrics_by_problem[problem] = metric_values

                    # scale fdc to [0, 1] for easier comparison on logy scale
                    """
                    if metric == PopDiversityMetric.FDC:
                        metrics_by_problem[problem][key] = sklearn.preprocessing.minmax_scale(
                            metrics_by_problem[problem][key], feature_range=(0, 1)
                        )
                    """

    for problem in metrics_by_problem:
        metrics = metrics_by_problem[problem]
        df_metrics = pd.DataFrame.from_dict(metrics)
        ax = df_metrics.plot(style=style, figsize=(25, 7), logy=True, fontsize=15)
        ax.legend(fontsize=15)
        ax.set_title(label=subset, fontdict={'fontsize':24})
        ax.set_xlabel(xlabel="Iterations", fontdict={'fontsize':20})

### Individual diversity metrics comparison

In [None]:
metrics_by_problem = {}
for algorithm in os.listdir(DATASET_PATH):
    if algorithm not in ["WVCPSO"]:#algorithms_to_plot:
        continue
    for problem in os.listdir(os.path.join(DATASET_PATH, algorithm)):
        runs = os.listdir(os.path.join(DATASET_PATH, algorithm, problem))
        runs.sort()
        run_path = os.path.join(DATASET_PATH, algorithm, problem, runs[0])
        run = SingleRunData.import_from_json(run_path)
        indiv_metrics = run.get_indiv_diversity_metrics_values(minmax_scale=False, standard_scale=True)
        for metric in INDIV_DIVERSITY_METRICS:
            key = '_'.join([algorithm, metric.value])
            if problem in metrics_by_problem:
                metrics_by_problem[problem][key] = indiv_metrics.get(metric.value).to_list()
            else:
                metric_values = {key: indiv_metrics.get(metric.value).to_list()}
                metrics_by_problem[problem] = metric_values
        

for problem in metrics_by_problem:
    metrics = metrics_by_problem[problem]
    df_metrics = pd.DataFrame.from_dict(metrics)

    fig, axes = plt.subplots(1, len(INDIV_DIVERSITY_METRICS))
    fig.subplots_adjust(wspace=0.7, top=0.825, bottom=0)
    #fig.suptitle(problem, fontsize=20)
    fig.suptitle("Individualne metrike raznolikosti - Schwefel", fontsize=22)

    for idx, metric in enumerate(INDIV_DIVERSITY_METRICS):
        df_metric = df_metrics.filter(regex=metric.value)
        df_metric.columns = df_metric.columns.str.replace("WVCPSO" + '_'+metric.value, 'PSO')
        ax = df_metric.plot(ax=axes[idx], kind="box", figsize=(15, 5), logy=False, fontsize=19)
        ax.margins(x=0)
        ax.set_title(label=metric.name, fontdict={'fontsize':19}, pad=10)

In [None]:
dataset_path = _DATASET_PATH

subsets = os.listdir(dataset_path)

for idx in range(len(subsets)):
    subset = f"{idx}_subset"

    metrics_by_problem = {}
    for algorithm in os.listdir(os.path.join(dataset_path, subset)):
        if algorithm not in algorithms_to_plot:
            continue
        for problem in os.listdir(os.path.join(dataset_path, subset, algorithm)):
            runs = os.listdir(os.path.join(dataset_path, subset, algorithm, problem))
            runs.sort()
            run_path = os.path.join(dataset_path, subset, algorithm, problem, runs[0])
            run = SingleRunData.import_from_json(run_path)
            indiv_metrics = run.get_indiv_diversity_metrics_values(minmax_scale=False, standard_scale=True)
            for metric in INDIV_DIVERSITY_METRICS:
                key = '_'.join([algorithm, metric.value])
                if problem in metrics_by_problem:
                    metrics_by_problem[problem][key] = indiv_metrics.get(metric.value).to_list()
                else:
                    metric_values = {key: indiv_metrics.get(metric.value).to_list()}
                    metrics_by_problem[problem] = metric_values
            

    for problem in metrics_by_problem:
        metrics = metrics_by_problem[problem]
        df_metrics = pd.DataFrame.from_dict(metrics)

        fig, axes = plt.subplots(1, len(INDIV_DIVERSITY_METRICS))
        fig.suptitle(f"{problem} {subset}", fontsize=23)
        fig.subplots_adjust(wspace=0.4)

        for idx, metric in enumerate(INDIV_DIVERSITY_METRICS):
            df_metric = df_metrics.filter(regex=metric.value)
            df_metric.columns = df_metric.columns.str.replace('_'+metric.value, '')
            ax = df_metric.plot(ax=axes[idx], kind="box", figsize=(25, 6), logy=False, fontsize=15)
            ax.set_title(label=metric.name, fontdict={'fontsize':20})

### Average of individual diversity metrics for all runs by subset

In [None]:
dataset_path = _DATASET_PATH

subsets = os.listdir(dataset_path)

for idx in range(len(subsets)):
    subset = f"{idx}_subset"

    metrics_by_problem = {}
    for algorithm in os.listdir(os.path.join(dataset_path, subset)):
        if algorithm not in algorithms_to_plot:
            continue
        for problem in os.listdir(os.path.join(dataset_path, subset, algorithm)):
            runs = os.listdir(os.path.join(dataset_path, subset, algorithm, problem))
            runs.sort()
            for run_file_name in runs:
                run_path = os.path.join(dataset_path, subset, algorithm, problem, runs[0])
                run = SingleRunData.import_from_json(run_path)
                indiv_metrics = run.get_indiv_diversity_metrics_values(minmax_scale=False)
                for metric in INDIV_DIVERSITY_METRICS:
                    key = '_'.join([algorithm, metric.value])
                    if problem in metrics_by_problem:
                        if key in metrics_by_problem[problem]:
                            sum_of_metrics = np.add(
                                metrics_by_problem[problem][key], indiv_metrics.get(metric.value).to_numpy() / len(runs)
                            )
                            metrics_by_problem[problem][key] = sum_of_metrics
                        else:    
                            metrics_by_problem[problem][key] = indiv_metrics.get(metric.value).to_numpy() / len(runs)
                    else:
                        metric_values = {key: indiv_metrics.get(metric.value).to_numpy() / len(runs)}
                        metrics_by_problem[problem] = metric_values
            

    for problem in metrics_by_problem:
        metrics = metrics_by_problem[problem]
        df_metrics = pd.DataFrame.from_dict(metrics)

        fig, axes = plt.subplots(1, len(INDIV_DIVERSITY_METRICS))
        fig.suptitle(f"{problem} {subset}", fontsize=23)
        fig.subplots_adjust(wspace=0.4)

        for idx, metric in enumerate(INDIV_DIVERSITY_METRICS):
            df_metric = df_metrics.filter(regex=metric.value)
            df_metric.columns = df_metric.columns.str.replace('_'+metric.value, '')
            ax = df_metric.plot(ax=axes[idx], kind="box", figsize=(25, 6), logy=True, fontsize=15)
            ax.set_title(label=metric.name, fontdict={'fontsize':20})