In [2]:
from niapy.algorithms.basic import (
    BatAlgorithm,
    ParticleSwarmAlgorithm,
    ParticleSwarmOptimization,
)
from tools.algorithms.fa import FireflyAlgorithm
from niapy.problems.ackley import Ackley
from niapy.problems.bent_cigar import BentCigar
from niapy.problems.sphere import Sphere
from niapy.problems.schwefel import Schwefel, Schwefel222
from niapy.problems.rastrigin import Rastrigin
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import svm
import sklearn
from scipy import spatial
import torch
from torch import nn
from PIL import Image
from matplotlib import pyplot as plt
import os
import json
import pygad
import numpy as np
import pandas as pd
import random
from tools.ml_tools import get_data_loaders, nn_test, nn_train, NNType, LSTMClassifier, LinearClassifier
from util.optimization_data import SingleRunData
from util.pop_diversity_metrics import PopDiversityMetric
from tools.optimization_tools import optimization_runner
from tools.meta_ga import MetaGA, MetaGAFitnessFunction
from tools.metaheuristic_similarity_analyzer import MetaheuristicSimilarityAnalyzer

from util.constants import (
    RNG_SEED,
    BATCH_SIZE,
    DATASET_PATH,
    EPOCHS,
    POP_SIZE,
    MAX_ITERS,
    MAX_EVALS,
    NUM_RUNS,
    OPTIMIZATION_PROBLEM,
    META_GA_CROSSOVER_PROBABILITY,
    META_GA_CROSSOVER_TYPE,
    META_GA_GENERATIONS,
    META_GA_K_TOURNAMENT,
    META_GA_KEEP_ELITISM,
    META_GA_MUTATION_NUM_GENES,
    META_GA_MUTATION_TYPE,
    META_GA_PARENT_SELECTION_TYPE,
    META_GA_PERCENT_PARENTS_MATING,
    META_GA_SOLUTIONS_PER_POP,
    GENE_SPACES,
    TARGET_GENE_SPACES,
    POP_DIVERSITY_METRICS,
    INDIV_DIVERSITY_METRICS,
    N_PCA_COMPONENTS,
    LSTM_NUM_LAYERS,
    LSTM_HIDDEN_DIM,
    LSTM_DROPOUT,
    VAL_SIZE,
    TEST_SIZE,
)

DATASET_PATH = "./archive/target_performance_similarity/09-17_15.37.29_WVCPSO_Schwefel/dataset/0_subset"

algorithms_to_plot = ['FA', 'WVCPSO', 'PSO']

execute_training = True
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)
print("CPUs: ", os.cpu_count())

cpu
CPUs:  12


In [3]:
analyzer = MetaheuristicSimilarityAnalyzer.import_from_pkl("./archive/target_performance_similarity/09-17_15.37.29_WVCPSO_Schwefel/msa_obj")
print(analyzer.similarity, "\n")
for solution in analyzer.optimized_solutions:
    print(solution)
print("")
for solution in analyzer.target_solutions:
    print(solution)

print("min: ", " & ".join(map(str, np.round(np.min(np.array(analyzer.target_solutions), axis=0), 2))), "&", " & ".join(map(str, np.round(np.min(np.array(analyzer.optimized_solutions), axis=0), 2))))
print("avg: ", " & ".join(map(str, np.round(np.mean(np.array(analyzer.target_solutions), axis=0), 2))), "&", " & ".join(map(str, np.round(np.mean(np.array(analyzer.optimized_solutions), axis=0), 2))))
print("max: ", " & ".join(map(str, np.round(np.max(np.array(analyzer.target_solutions), axis=0), 2))), "&", " & ".join(map(str, np.round(np.max(np.array(analyzer.optimized_solutions), axis=0), 2))))
print("std: ", " & ".join(map(str, np.round(np.std(np.array(analyzer.target_solutions), axis=0), 2))), "&", " & ".join(map(str, np.round(np.std(np.array(analyzer.optimized_solutions), axis=0), 2))))

[0.9758951945778113, 0.9444098228289451, 0.9685555405179425, 0.9961127817660602, 0.9841942448904325, 0.9958329904899021, 0.9925717943835706, 0.9795050504085272, 0.9955789972956697, 0.9900625424281313, 0.9671113247363249, 0.9543454694089516, 0.7814385119635219, 0.9827782654805161, 0.9979705758074211, 0.9859355053312998, 0.9880394430958183, 0.997039442898456, 0.9902485707996629, 0.9977444798815096, 0.998165499883075, 0.9508605029517587, 0.995964298403001, 0.8822917498696059, 0.6794349790139599, 0.9802577239382497, 0.9780038193251145, 0.9953200264371361, 0.9956229311274402, 0.9990439251929232] 

[1.56 1.59 0.06]
[2.12 0.18 0.55]
[0.18 1.4  0.36]
[1.23 1.49 0.83]
[1.69 2.4  0.92]
[1.42 0.89 0.97]
[2.37 1.84 0.63]
[1.79 0.42 0.19]
[0.83 1.44 0.72]
[0.88 0.37 0.41]
[0.2  1.6  0.47]
[1.41 0.01 0.97]
[1.95 1.45 0.5 ]
[0.35 0.07 0.72]
[0.66 0.17 0.01]
[0.1  1.16 0.58]
[2.24 0.08 0.4 ]
[2.   0.22 0.41]
[2.04 1.67 0.94]
[0.75 2.28 0.53]
[0.19 0.86 0.9 ]
[0.29 1.15 0.57]
[0.48 0.24 0.65]
[0.41 1.6

### Optimization

In [None]:
ga_instance_0 = pygad.load("./archive/target_performance_similarity/08-01_14.42.43_WVCPSO_Schwefel/0_FA_Schwefel/meta_ga_obj")
ga_instance_1 = pygad.load("./archive/target_performance_similarity/08-01_14.42.43_WVCPSO_Schwefel/0_FA_Schwefel/meta_ga_obj")

In [None]:
ga_instance_0.plot_genes(solutions="best")
ga_instance_0.plot_genes(solutions="all")
ga_instance_0.plot_new_solution_rate()

print(ga_instance_0.best_solutions[-1])
print(ga_instance_0.best_solutions_fitness[-1])

In [None]:
ga_instance_1.plot_genes(solutions="best")
ga_instance_1.plot_genes(solutions="all")
ga_instance_1.plot_new_solution_rate()

print(ga_instance_1.best_solutions[-1])
print(ga_instance_1.best_solutions_fitness[-1])

In [None]:
algorithms = MetaGA.solution_to_algorithm_attributes(np.concatenate([ga_instance_0.best_solutions[-1], ga_instance_1.best_solutions[-1]]), GENE_SPACES, POP_SIZE)

In [None]:
use_test_setting = True

problem = OPTIMIZATION_PROBLEM

if use_test_setting:
    problem = Schwefel(dimension=20)
    algorithms = [
        #FireflyAlgorithm(population_size=POP_SIZE, alpha=0.239739118, beta0=0.685364775, gamma=0.000244544267, theta=0.996317628),
        #FireflyAlgorithm(population_size=POP_SIZE, alpha=0.84464886, beta0=0.74171366, gamma=0.60686203, theta=0.97758844),
        FireflyAlgorithm(population_size=POP_SIZE, alpha=0.01, beta0=0.43, gamma=0.693, theta=0.962),
        #ParticleSwarmAlgorithm(population_size=POP_SIZE, c1=1.14, c2=0.05, w=0.54),
        #ParticleSwarmAlgorithm(population_size=POP_SIZE, c1=2.00417841, c2=0.70674774, w=0.82266951),
        #ParticleSwarmAlgorithm(population_size=POP_SIZE, c1=1.35776463, c2=1.7283054, w=0.58735118, min_velocity=-93.37297714, max_velocity=55.94034227),
    ]


for algorithm in algorithms:
    optimization_runner(
        algorithm=algorithm,
        problem=problem,
        runs=NUM_RUNS,
        dataset_path="./dataset",
        pop_diversity_metrics=POP_DIVERSITY_METRICS,
        indiv_diversity_metrics=INDIV_DIVERSITY_METRICS,
        max_evals=MAX_EVALS,
        run_index_seed=True,
        keep_pop_data=False,
        parallel_processing=True,
    )

### Population diversity metrics comparison

In [None]:
pop_metrics_list = [
    PopDiversityMetric.FDC,
    PopDiversityMetric.PDC,
    PopDiversityMetric.PFSD,
    PopDiversityMetric.PFMea,
]

for algorithm in os.listdir(DATASET_PATH):
    for problem in os.listdir(os.path.join(DATASET_PATH, algorithm)):
        runs = os.listdir(os.path.join(DATASET_PATH, algorithm, problem))
        runs.sort()
        run_path = os.path.join(DATASET_PATH, algorithm, problem, runs[0])
        srd = SingleRunData.import_from_json(run_path)
        print(f"best fitness {algorithm} - {problem}: {srd.best_fitness}")
        pop_metrics = SingleRunData.import_from_json(run_path).get_pop_diversity_metrics_values(metrics=pop_metrics_list, normalize=True)
        ax = pop_metrics.plot(figsize=(25,7), fontsize=15, logy=False)
        ax.set_title(label=" ".join([algorithm, problem]), fontdict={'fontsize':24})
        ax.set_xlabel(xlabel="Iterations", fontdict={'fontsize':20})
        ax.set_ylabel(ylabel="Value", fontdict={'fontsize':20})

### Feature vectors comparison

In [None]:


pop_metrics_list = [
    PopDiversityMetric.AAD,
    PopDiversityMetric.FDC,
    PopDiversityMetric.PDC,
    PopDiversityMetric.PFSD,
    PopDiversityMetric.PFMea,
]

for i in range(50):
    _dataset_path = f"./archive/target_performance_similarity/07-29_22.48.39_WVCPSO_Schwefel/0_FA_Schwefel/meta_ga_tmp_data/{i}_meta_dataset"
    feature_vectors = []
    for idx, algorithm in enumerate(os.listdir(_dataset_path)):
        for problem in os.listdir(os.path.join(_dataset_path, algorithm)):
            runs = os.listdir(os.path.join(_dataset_path, algorithm, problem))
            runs.sort()
            for j, run in enumerate(runs):
                if i == 20:
                    print(j)
                run_path = os.path.join(_dataset_path, algorithm, problem, run)
                srd = SingleRunData.import_from_json(run_path)
                feature_vector = srd.get_combined_feature_vector()
                
                feature_vectors.append(feature_vector)

In [None]:
pop_metrics_list = [
    PopDiversityMetric.AAD,
    PopDiversityMetric.FDC,
    PopDiversityMetric.PDC,
    PopDiversityMetric.PFSD,
    PopDiversityMetric.PFMea,
]


feature_vectors_list = []

feature_vectors_1 = []
feature_vectors_2 = []
for idx, algorithm in enumerate(os.listdir(DATASET_PATH)):
    for problem in os.listdir(os.path.join(DATASET_PATH, algorithm)):
        runs = os.listdir(os.path.join(DATASET_PATH, algorithm, problem))
        runs.sort()
        for run in runs:
            run_path = os.path.join(DATASET_PATH, algorithm, problem, run)
            srd = SingleRunData.import_from_json(run_path)
            feature_vector = srd.get_combined_feature_vector()
            #pop_metrics = SingleRunData.import_from_json(run_path).get_pop_diversity_metrics_values(metrics=pop_metrics_list, normalize=True)
            #ax = pop_metrics.plot(figsize=(25,7), fontsize=15, logy=False)
            #ax.set_title(label=" ".join([algorithm, problem]), fontdict={'fontsize':24})
            #ax.set_xlabel(xlabel="Iterations", fontdict={'fontsize':20})
            #ax.set_ylabel(ylabel="Value", fontdict={'fontsize':20})

            if idx == 0:
                feature_vectors_1.append(feature_vector)
            else:
                feature_vectors_2.append(feature_vector)

feature_vectors_list.append(feature_vectors_1)
feature_vectors_list.append(feature_vectors_2)

# similarity plot for variable number of vectors
mean_similarities = []
for i in range(2, len(feature_vectors_1)):
    mean_vector_1 = np.mean(feature_vectors_1[:i], axis=0)
    mean_vector_2 = np.mean(feature_vectors_2[:i], axis=0)
    mean_similarities.append(1 - spatial.distance.cosine(mean_vector_1, mean_vector_2))

mean_pairwise_similarities = []
for i in range(2, len(feature_vectors_1)):
    similarities = []
    for feature_vector1, feature_vector2 in zip(feature_vectors_1[:i], feature_vectors_2[:i]):
        similarities.append(1 - spatial.distance.cosine(feature_vector1, feature_vector2))

    mean_pairwise_similarities.append(np.mean(similarities))

mean_pairwise_total_similarities = []
for i in range(2, len(feature_vectors_1)):
    total_sum = 0
    for idx_1 in range(len(feature_vectors_1[:i])):
        sum = 0
        for idx_2 in range(len(feature_vectors_2[:i])):
            if idx_1 == idx_2:
                continue
            sum += 1 - spatial.distance.cosine(feature_vectors_1[idx_1], feature_vectors_2[idx_2])
        total_sum += sum/(len(feature_vectors_2[:i]) - 1)
    mean_pairwise_total_similarities.append(total_sum/(len(feature_vectors_2[:i]) - 1))

plt.plot(np.arange(0, len(mean_pairwise_total_similarities)), mean_pairwise_total_similarities)
plt.plot(np.arange(0, len(mean_pairwise_similarities)), mean_pairwise_similarities)
plt.plot(np.arange(0, len(mean_similarities)), mean_similarities)
plt.show()

# plot mean vectors for visual comparison
mean_vector_1 = np.mean(feature_vectors_1, axis=0)
mean_vector_2 = np.mean(feature_vectors_2, axis=0)
plt.plot(np.arange(0, len(mean_vector_1)), mean_vector_1)
plt.plot(np.arange(0, len(mean_vector_2)), mean_vector_2)
plt.show()
print(1 - spatial.distance.cosine(mean_vector_1, mean_vector_2), "\n")

total_sum = 0
for idx_1 in range(len(feature_vectors_1)):
    sum = 0
    for idx_2 in range(len(feature_vectors_2)):
        if idx_1 == idx_2:
            continue
        #print(f"{idx_1} : {1 - spatial.distance.cosine(feature_vectors_2[idx_1], feature_vectors_2[idx_2])}")
        sum += 1 - spatial.distance.cosine(feature_vectors_1[idx_1], feature_vectors_2[idx_2])
    total_sum += sum/(len(feature_vectors_2) - 1)
print("total similarity: ", total_sum/(len(feature_vectors_2) - 1))

similarities = []
for feature_vector1, feature_vector2 in zip(feature_vectors_1, feature_vectors_2):
    #print(1 - spatial.distance.cosine(feature_vector1, feature_vector2))
    similarities.append(1 - spatial.distance.cosine(feature_vector1, feature_vector2))

print("pairwise similarity",np.mean(similarities))

for vector_idx in range(4):
    plt.plot(np.arange(0, len(feature_vectors_1[vector_idx])), feature_vectors_1[vector_idx])
    plt.plot(np.arange(0, len(feature_vectors_2[vector_idx])), feature_vectors_2[vector_idx])
    plt.show()

### FDC comparison

In [None]:
fdc_values = {}
for algorithm in os.listdir(DATASET_PATH):
    if algorithm not in algorithms_to_plot:
        continue
    for problem in os.listdir(os.path.join(DATASET_PATH, algorithm)):
        runs = os.listdir(os.path.join(DATASET_PATH, algorithm, problem))
        runs.sort()
        run_path = os.path.join(DATASET_PATH, algorithm, problem, runs[0])
        run = SingleRunData.import_from_json(run_path)
        metrics = run.get_pop_diversity_metrics_values(metrics=[PopDiversityMetric.FDC], normalize=False)
        if len(metrics) == 0:
            continue
        fdc = metrics.get("fdc")
        
        if problem in fdc_values:
            fdc_values[problem][algorithm] = fdc
        else:
            fdc_dict = {algorithm: fdc}
            fdc_values[problem] = fdc_dict

for problem in fdc_values:
    fdc_dict = fdc_values[problem]
    for key in fdc_dict:
        convergence = fdc_dict[key]
        fdc_dict[key] = convergence

    fdc_dict = pd.DataFrame.from_dict(fdc_dict)
    ax = fdc_dict.plot(title=problem, figsize=(25, 7), logy=False, fontsize=15)
    ax.legend(fontsize=15)
    ax.set_title(label=problem, fontdict={'fontsize':24})
    ax.set_xlabel(xlabel="Iterations", fontdict={'fontsize':20})

In [None]:
line_styles = ['-g', ':g', '--g', '-.g', '-b', ':b', '--b', '-.b']
_line_styles = ['-g', '-b', '-r', '-k', ':g', ':b', ':r', ':k']
style = {}
pop_metrics_list = [
    PopDiversityMetric.FDC,
    PopDiversityMetric.PDC,
    PopDiversityMetric.PFSD,
    PopDiversityMetric.PFMea,
]
style_idx = 0
for algorithm in os.listdir(DATASET_PATH):
    if algorithm not in algorithms_to_plot:
        continue
    for idx, metric in enumerate(pop_metrics_list):
        if idx > 3:
            continue
        style['_'.join([algorithm, metric.value])] = line_styles[style_idx]
        style_idx += 1

metrics_by_problem = {}
for algorithm in os.listdir(DATASET_PATH):
    if algorithm not in algorithms_to_plot:
        continue
    for problem in os.listdir(os.path.join(DATASET_PATH, algorithm)):
        runs = os.listdir(os.path.join(DATASET_PATH, algorithm, problem))
        runs.sort()
        run_path = os.path.join(DATASET_PATH, algorithm, problem, runs[0])
        run = SingleRunData.import_from_json(run_path)
        pop_metrics = run.get_pop_diversity_metrics_values(metrics=pop_metrics_list, normalize=False)
        #if "fdc" in pop_metrics:
        #    pop_metrics.drop(columns=["fdc"], inplace=True)
        for metric in pop_metrics_list:
            key = '_'.join([algorithm, metric.value])
            if metric.value not in pop_metrics:
                continue
            if problem in metrics_by_problem:
                metrics_by_problem[problem][key] = pop_metrics.get(metric.value).to_list()
            else:
                metric_values = {key: pop_metrics.get(metric.value).to_list()}
                metrics_by_problem[problem] = metric_values
        

for problem in metrics_by_problem:
    metrics = metrics_by_problem[problem]
    df_metrics = pd.DataFrame.from_dict(metrics)
    ax = df_metrics.plot(style=style, figsize=(25, 7), logy=True, fontsize=15)
    ax.legend(fontsize=15)
    ax.set_title(label=problem, fontdict={'fontsize':24})
    ax.set_xlabel(xlabel="Iterations", fontdict={'fontsize':20})

### Best fitness value convergence comparison

In [None]:
convergences = {}
for algorithm in os.listdir(DATASET_PATH):
    if algorithm not in algorithms_to_plot:
        continue
    for problem in os.listdir(os.path.join(DATASET_PATH, algorithm)):
        runs = os.listdir(os.path.join(DATASET_PATH, algorithm, problem))
        runs.sort()
        run_path = os.path.join(DATASET_PATH, algorithm, problem, runs[0])
        run = SingleRunData.import_from_json(run_path)
        print(f"best fitness {algorithm} - {problem}: {run.best_fitness}")
        convergence = run.get_best_fitness_values(normalize=False)
        
        if problem in convergences:
            convergences[problem][algorithm] = convergence
        else:
            convergence_dict = {algorithm: convergence}
            convergences[problem] = convergence_dict

for problem in convergences:
    convergence_dict = convergences[problem]
    for key in convergence_dict:
        convergence = convergence_dict[key]
        convergence_dict[key] = convergence

    convergence_dict = pd.DataFrame.from_dict(convergence_dict)
    ax = convergence_dict.plot(title=problem, figsize=(25, 7), logy=False, fontsize=15)
    ax.legend(fontsize=15)
    ax.set_title(label=problem, fontdict={'fontsize':24})
    ax.set_xlabel(xlabel="Iterations", fontdict={'fontsize':20})

### Individual diversity metrics comparison

In [None]:
metrics_by_problem = {}
for algorithm in os.listdir(DATASET_PATH):
    if algorithm not in algorithms_to_plot:
        continue
    for problem in os.listdir(os.path.join(DATASET_PATH, algorithm)):
        runs = os.listdir(os.path.join(DATASET_PATH, algorithm, problem))
        runs.sort()
        run_path = os.path.join(DATASET_PATH, algorithm, problem, runs[0])
        run = SingleRunData.import_from_json(run_path)
        indiv_metrics = run.get_indiv_diversity_metrics_values(normalize=False)
        for metric in INDIV_DIVERSITY_METRICS:
            key = '_'.join([algorithm, metric.value])
            if problem in metrics_by_problem:
                metrics_by_problem[problem][key] = indiv_metrics.get(metric.value).to_list()
            else:
                metric_values = {key: indiv_metrics.get(metric.value).to_list()}
                metrics_by_problem[problem] = metric_values
        

for problem in metrics_by_problem:
    metrics = metrics_by_problem[problem]
    df_metrics = pd.DataFrame.from_dict(metrics)

    fig, axes = plt.subplots(1, len(INDIV_DIVERSITY_METRICS))
    fig.suptitle(problem, fontsize=23)

    for idx, metric in enumerate(INDIV_DIVERSITY_METRICS):
        df_metric = df_metrics.filter(regex=metric.value)
        df_metric.columns = df_metric.columns.str.replace('_'+metric.value, '')
        ax = df_metric.plot(ax=axes[idx], kind="box", figsize=(25, 6), logy=False, fontsize=15)
        ax.set_title(label=metric.name, fontdict={'fontsize':20})

In [None]:
for algorithm in os.listdir(DATASET_PATH):
    for problem in os.listdir(os.path.join(DATASET_PATH, algorithm)):
        runs = os.listdir(os.path.join(DATASET_PATH, algorithm, problem))
        runs.sort()
        run_path = os.path.join(DATASET_PATH, algorithm, problem, runs[0])
        srd = SingleRunData.import_from_json(run_path)
        indiv_metrics = SingleRunData.import_from_json(run_path).get_indiv_diversity_metrics_values(normalize=True)
        ax = indiv_metrics.plot(figsize=(25, 7), kind="bar", logy=True, fontsize=20, rot=0)
        ax.legend(fontsize=15)
        ax.set_title(label=" ".join([algorithm, problem]), fontdict={'fontsize':24})
        ax.set_xlabel(xlabel="Individuals", fontdict={'fontsize':20})

        indiv_metrics = indiv_metrics.to_numpy()
        pca = PCA(n_components=N_PCA_COMPONENTS)
        principal_components = pca.fit_transform(indiv_metrics)
        variance = pca.explained_variance_ratio_

### NN training and test

### LSTM model

In [None]:
train_data_loader, val_data_loader, test_data_loader, actual_labels = get_data_loaders(
    dataset_path=DATASET_PATH,
    batch_size=BATCH_SIZE,
    val_size=VAL_SIZE,
    test_size=TEST_SIZE,
    n_pca_components=N_PCA_COMPONENTS,
    problems=[OPTIMIZATION_PROBLEM.name()],
    dataset_subsets=True,
    random_state=RNG_SEED
)

pop_features, indiv_features, target = next(iter(train_data_loader))
lstm_model = LSTMClassifier(
    input_dim=np.shape(pop_features)[2],
    aux_input_dim=np.shape(indiv_features)[1],
    num_labels=len(actual_labels),
    hidden_dim=LSTM_HIDDEN_DIM,
    num_layers=LSTM_NUM_LAYERS,
    dropout=LSTM_DROPOUT
)

optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-5)
loss_fn = nn.CrossEntropyLoss()
lstm_model_filename = f"./lstm_model.pt"

if execute_training:
    lstm_model.to(device)
    nn_train(
        model=lstm_model,
        train_data_loader=train_data_loader,
        val_data_loader=val_data_loader,
        epochs=EPOCHS,
        loss_fn=loss_fn,
        optimizer=optimizer,
        device=device,
        model_filename=lstm_model_filename,
        verbal=True)
else:
    lstm_model = torch.load(lstm_model_filename, map_location=torch.device(device))
    lstm_model.to(device)
    if os.path.exists('loss_plot.png'):
        loss_plot = np.asarray(Image.open('loss_plot.png'))
        plt.axis("off")
        plt.imshow(loss_plot)

In [None]:
nn_test(
    model=lstm_model,
    test_data_loader=test_data_loader,
    device=device,
    labels=actual_labels,
    show_classification_report=True
)

### Linear model

In [None]:
train_data_loader, val_data_loader, test_data_loader, actual_labels = get_data_loaders(
    dataset_path=DATASET_PATH,
    batch_size=100,
    val_size=VAL_SIZE,
    test_size=TEST_SIZE,
    n_pca_components=N_PCA_COMPONENTS,
    problems=[OPTIMIZATION_PROBLEM.name()],
    dataset_subsets=True,
    nn_type=NNType.LINEAR,
    random_state=RNG_SEED
)

feature_vector, target = next(iter(train_data_loader))

linear_model = LinearClassifier(
    input_dim=np.shape(feature_vector)[1],
    num_labels=len(actual_labels),
)

optimizer = torch.optim.Adam(linear_model.parameters(), lr=1e-5)
loss_fn = nn.CrossEntropyLoss()
linear_model_filename = f"./linear_model.pt"

if execute_training:
    linear_model.to(device)
    nn_train(
        model=linear_model,
        train_data_loader=train_data_loader,
        val_data_loader=val_data_loader,
        epochs=200,
        loss_fn=loss_fn,
        optimizer=optimizer,
        device=device,
        model_filename=linear_model_filename,
        nn_type=NNType.LINEAR,
        verbal=True)
else:
    linear_model = torch.load(linear_model_filename, map_location=torch.device(device))
    linear_model.to(device)
    if os.path.exists('loss_plot.png'):
        loss_plot = np.asarray(Image.open('loss_plot.png'))
        plt.axis("off")
        plt.imshow(loss_plot)


In [None]:
nn_test(
    model=linear_model,
    test_data_loader=test_data_loader,
    device=device,
    nn_type=NNType.LINEAR,
    labels=actual_labels,
    show_classification_report=True
)

### Clustering

#### By subset

In [None]:
analyzer = MetaheuristicSimilarityAnalyzer.import_from_pkl("./archive/target_performance_similarity/06-28_10.33.39_FA_Schwefel/msa_obj")
combined_feature_vectors = []
markers = []
actual_labels = []
subset_idx = 1

for vector in analyzer.target_feature_vectors[subset_idx]:
    combined_feature_vectors.append(vector)
    markers.append("o")
    actual_labels.append(0)
for vector in analyzer.optimized_solutions_feature_vectors[subset_idx]:
    combined_feature_vectors.append(vector)
    markers.append("x")
    actual_labels.append(1)


pca = PCA(n_components=3)
pca_feature_vectors = pca.fit_transform(combined_feature_vectors)
print(pca.explained_variance_ratio_)

kmeans = KMeans(n_clusters=2, random_state=RNG_SEED)
predicted_labels = kmeans.fit_predict(combined_feature_vectors)

# classification report
print(sklearn.metrics.classification_report(actual_labels, predicted_labels))

# confusion matrix
confusion_matrix = sklearn.metrics.confusion_matrix(actual_labels, predicted_labels)
cm_display = sklearn.metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = [0, 1])
cm_display.plot()
plt.show()
print(predicted_labels)

# scatter plot
colors = ["orange", "blue"]
fig = plt.figure(figsize = (20, 10))
ax = plt.axes(projection ="3d")
for idx, (location, label, marker) in enumerate(zip(pca_feature_vectors, predicted_labels, markers)):
    ax.scatter3D(location[0], location[1], location[2], color = colors[label], marker=markers[idx], s=30)

ax.set_xlabel("PCA_1")
ax.set_ylabel("PCA_2")
ax.set_zlabel("PCA_3")
plt.title("Features")
plt.show()

#### All subsets

In [None]:
average_feature_vectors = False
analyzer = MetaheuristicSimilarityAnalyzer.import_from_pkl("./archive/target_performance_similarity/07-30_08.14.05_WVCPSO_Schwefel/msa_obj")
combined_feature_vectors = []
markers = []
actual_labels = []

if average_feature_vectors:
    for array in analyzer.target_feature_vectors:
        combined_feature_vectors.append(np.mean(array, axis=0))
        markers.append("o")
        actual_labels.append(0)
    for array in analyzer.optimized_solutions_feature_vectors:
        combined_feature_vectors.append(np.mean(array, axis=0))
        markers.append("x")
        actual_labels.append(1)
else:
    for array in analyzer.target_feature_vectors:
        for vector in array:
            combined_feature_vectors.append(vector)
            markers.append("o")
            actual_labels.append(0)
    for array in analyzer.optimized_solutions_feature_vectors:
        for vector in array:
            combined_feature_vectors.append(vector)
            markers.append("x")
            actual_labels.append(1)


pca = PCA(n_components=3)
pca_feature_vectors = pca.fit_transform(combined_feature_vectors)
print(pca.explained_variance_ratio_)

kmeans = KMeans(n_clusters=2, random_state=RNG_SEED)
predicted_labels = kmeans.fit_predict(combined_feature_vectors)

# classification report
print(sklearn.metrics.classification_report(actual_labels, predicted_labels))

# confusion matrix
confusion_matrix = sklearn.metrics.confusion_matrix(actual_labels, predicted_labels)
cm_display = sklearn.metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = [0, 1])
cm_display.plot()
plt.show()
print(predicted_labels)

# scatter plot
colors = ["orange", "blue"]
fig = plt.figure(figsize = (20, 10))
ax = plt.axes(projection ="3d")
for idx, (location, label, marker) in enumerate(zip(pca_feature_vectors, predicted_labels, markers)):
    ax.scatter3D(location[0], location[1], location[2], color = colors[label], marker=markers[idx], s=30)

ax.set_xlabel("PCA_1")
ax.set_ylabel("PCA_2")
ax.set_zlabel("PCA_3")
plt.title("Features")
plt.show()

### Pairwise feature vectors visualization

In [None]:
average_feature_vectors = True
analyzer = MetaheuristicSimilarityAnalyzer.import_from_pkl("./archive/target_performance_similarity/06-28_10.33.39_FA_Schwefel/msa_obj")
combined_feature_vectors = []
actual_labels = []
colors = []

if average_feature_vectors:
    number_labels = [*range(analyzer.comparisons)]
    color = [np.random.uniform(0.0, 1.0), np.random.uniform(0.0, 1.0), np.random.uniform(0.0, 1.0)]
    for array in analyzer.target_feature_vectors:
        colors.append(color) 
        combined_feature_vectors.append(np.mean(array, axis=0))
        actual_labels.append(0)
    color = [np.random.uniform(0.0, 1.0), np.random.uniform(0.0, 1.0), np.random.uniform(0.0, 1.0)]
    for array in analyzer.optimized_solutions_feature_vectors:
        colors.append(color) 
        combined_feature_vectors.append(np.mean(array, axis=0))
        actual_labels.append(1)
else:
    #number_labels = [*range(analyzer.meta_ga.num_runs * analyzer.comparisons)]
    number_labels = [*range(150 * analyzer.comparisons)]
    color = [np.random.uniform(0.0, 1.0), np.random.uniform(0.0, 1.0), np.random.uniform(0.0, 1.0)]
    for array in analyzer.target_feature_vectors:
        for vector in array:
            colors.append(color)
            combined_feature_vectors.append(vector)
            actual_labels.append(0)
    color = [np.random.uniform(0.0, 1.0), np.random.uniform(0.0, 1.0), np.random.uniform(0.0, 1.0)]
    for array in analyzer.optimized_solutions_feature_vectors:
        for vector in array:
            colors.append(color)
            combined_feature_vectors.append(vector)
            actual_labels.append(1)

#colors.extend(colors)

number_labels.extend(number_labels)

pca = PCA(n_components=3)
pca_feature_vectors = pca.fit_transform(combined_feature_vectors)
print(pca.explained_variance_ratio_)

# scatter plot
fig = plt.figure(figsize=(30, 20))
ax = plt.axes(projection="3d")
for idx, location in enumerate(pca_feature_vectors):
    ax.scatter3D(location[0], location[1], location[2], color = colors[idx], s=30)
    ax.text(location[0], location[1], location[2], str(number_labels[idx]), size=10, zorder=10, color='k')

ax.set_xlabel("PCA_1")
ax.set_ylabel("PCA_2")
ax.set_zlabel("PCA_3")
plt.title("Features")
plt.show()

### K Neighbors & SVM Classifiers

In [None]:
average_feature_vectors = False
analyzer = MetaheuristicSimilarityAnalyzer.import_from_pkl("./archive/target_performance_similarity/07-30_08.14.05_WVCPSO_Schwefel/msa_obj")
combined_feature_vectors = []
actual_labels = []

if average_feature_vectors:
    for array in analyzer.target_feature_vectors:
        combined_feature_vectors.append(np.mean(array, axis=0))
        actual_labels.append(0)
    for array in analyzer.optimized_solutions_feature_vectors:
        combined_feature_vectors.append(np.mean(array, axis=0))
        actual_labels.append(1)
else:
    for array in analyzer.target_feature_vectors:
        for vector in array:
            _vector = sklearn.preprocessing.minmax_scale(vector, feature_range=(0, 1))
            combined_feature_vectors.append(_vector)
            actual_labels.append(0)
    for array in analyzer.optimized_solutions_feature_vectors:
        for vector in array:
            _vector = sklearn.preprocessing.minmax_scale(vector, feature_range=(0, 1))
            combined_feature_vectors.append(_vector)
            actual_labels.append(1)

# train test split
X_train, X_test, y_train, y_test = train_test_split(combined_feature_vectors, actual_labels, test_size = 0.2, shuffle=True)

# K-SVM classifier
k_svm = svm.SVC(kernel='rbf') 
k_svm.fit(X_train, y_train)
svm_training_score = k_svm.score(X_train, y_train) 
svm_test_score = k_svm.score(X_test, y_test)
print("SVM train:", svm_training_score)
print("SVM test:", svm_test_score)

# kNN classifier
knc = KNeighborsClassifier(n_neighbors = 10) 
knc.fit(X_train, y_train)
knn_training_score = knc.score(X_train, y_train) 
knn_test_score = knc.score(X_test, y_test) 
print("KNN train:", knn_training_score)
print("KNN test:", knn_test_score)

### Comparison of feature vectors with metaheuristics constant parameters

In [None]:
dataset_path = "./dataset"
feature_vectors_1 = []
feature_vectors_2 = []
for idx, algorithm in enumerate(os.listdir(dataset_path)):
    for problem in os.listdir(os.path.join(dataset_path, algorithm)):
        runs = os.listdir(os.path.join(dataset_path, algorithm, problem))
        runs.sort()
        for run in runs:
            run_path = os.path.join(dataset_path, algorithm, problem, run)
            srd = SingleRunData.import_from_json(run_path)
            feature_vector = srd.get_combined_feature_vector()

            if idx == 0:
                feature_vectors_1.append(feature_vector)
            else:
                feature_vectors_2.append(feature_vector)

combined_feature_vectors = []
markers = []
actual_labels = []
number_labels = [*range(150)]
number_labels.extend(number_labels)

for array in feature_vectors_1:
        combined_feature_vectors.append(array)
        markers.append("o")
        actual_labels.append(0)
for array in feature_vectors_2:
        combined_feature_vectors.append(array)
        markers.append("x")
        actual_labels.append(1)


pca = PCA(n_components=3)
pca_feature_vectors = pca.fit_transform(combined_feature_vectors)
print(pca.explained_variance_ratio_)

kmeans = KMeans(n_clusters=2)#, random_state=RNG_SEED)
predicted_labels = kmeans.fit_predict(combined_feature_vectors)

# classification report
print(sklearn.metrics.cluster.adjusted_rand_score(actual_labels, predicted_labels))
print(sklearn.metrics.classification_report(actual_labels, predicted_labels))

# confusion matrix
confusion_matrix = sklearn.metrics.confusion_matrix(actual_labels, predicted_labels)
cm_display = sklearn.metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = [0, 1])
cm_display.plot()
plt.show()
print(predicted_labels)

# train test split
X_train, X_test, y_train, y_test = train_test_split(combined_feature_vectors, actual_labels, test_size = 0.2, shuffle=True)

# K-SVM classifier
k_svm = svm.SVC(kernel='rbf') 
k_svm.fit(X_train, y_train)
svm_training_score = k_svm.score(X_train, y_train) 
svm_test_score = k_svm.score(X_test, y_test)
print("SVM train:", svm_training_score)
print("SVM test:", svm_test_score)

# kNN classifier
knc = KNeighborsClassifier(n_neighbors = 10) 
knc.fit(X_train, y_train)
knn_training_score = knc.score(X_train, y_train) 
knn_test_score = knc.score(X_test, y_test) 
print("KNN train:", knn_training_score)
print("KNN test:", knn_test_score)

# scatter plot
colors = ["orange", "blue"]
fig = plt.figure(figsize = (30, 20))
ax = plt.axes(projection ="3d")
for idx, (location, label, marker) in enumerate(zip(pca_feature_vectors, predicted_labels, markers)):
    ax.scatter3D(location[0], location[1], location[2], color = colors[label], marker=markers[idx], s=30)
    ax.text(location[0], location[1], location[2], str(number_labels[idx]), size=10, zorder=10, color='k')

ax.set_xlabel("PCA_1")
ax.set_ylabel("PCA_2")
ax.set_zlabel("PCA_3")
plt.title("Features")
plt.show()