In [None]:
from landscape_analysis_toolbox.constrained_landscape_features import (
    ConstrainedLandscapeFeatures,
)
from distribution_optimization_py.constrained_landscape_analysis import (
    DistributionOptimizationProblem,
)
from distribution_optimization_py.datasets import DATASETS, Dataset
import pandas as pd
import numpy as np

dataset_name_to_features_df: dict[str, pd.DataFrame] = {}
for dataset in DATASETS:
    problem = DistributionOptimizationProblem(
        data=dataset.data, nr_of_modes=dataset.nr_of_modes
    )
    feature_calculator = ConstrainedLandscapeFeatures(
        bounds=problem.get_bounds(),
        fitness=lambda x: problem.fitness(x),
        violation=lambda x: problem.violation(x),
        sampler=lambda sample_size, _: np.array(
            [problem.initialize() for _ in range(sample_size)]
        ),
    )
    print(dataset.name)
    feature_calculator.plot_fitness_validation()
    dataset_name_to_features_df[dataset.name] = feature_calculator.compute_features()

In [None]:
dataset_name_to_mean_features_df = {
    name: df.mean() for name, df in dataset_name_to_features_df.items()
}

In [None]:
all_mean_dfs = []
for name, df in dataset_name_to_mean_features_df.items():
    df.name = name
    all_mean_dfs.append(df)
all_mean_df = pd.concat(all_mean_dfs, axis=1).T

In [None]:
all_mean_df

In [None]:
from distribution_optimization_py.problem import GaussianMixtureProblem
from distribution_optimization_py.datasets import DATASETS
import numpy as np
import plotly.express as px

N = 100
for dataset in DATASETS:
    problem = GaussianMixtureProblem(
        data=dataset.data, nr_of_modes=dataset.nr_of_modes, id=dataset.name
    )

    samples = np.random.uniform(problem.lower, problem.upper, (N, problem.lower.size))

    errors = [
        problem.overlap_error_by_density(
            sample[2 * dataset.nr_of_modes :],
            sample[dataset.nr_of_modes : 2 * dataset.nr_of_modes],
            sample[: dataset.nr_of_modes],
        )
        for sample in samples
    ]
    fitness_values = [
        problem.similarity_error(
            sample[2 * dataset.nr_of_modes :],
            sample[dataset.nr_of_modes : 2 * dataset.nr_of_modes],
            sample[: dataset.nr_of_modes],
        )
        for sample in samples
    ]
    likelihood_values = [
        problem.log_likelihood(sample) for sample in samples
    ]
    print(dataset.name)
    px.histogram(errors, nbins=100).show()
    px.histogram(fitness_values, nbins=100).show()
    px.histogram(likelihood_values, nbins=100).show()
    print(min(fitness_values))

In [None]:
dataset = DATASETS[0]
problem = GaussianMixtureProblem(
    data=dataset.data, nr_of_modes=dataset.nr_of_modes, id=dataset.name
)
solution = dataset.solution.genome

N = 1000

samples = np.random.multivariate_normal(
    mean=solution, cov=np.identity(solution.size) * 0.1, size=N
)
errors = [
    problem.overlap_error_by_density(
        sample[2 * dataset.nr_of_modes :],
        sample[dataset.nr_of_modes : 2 * dataset.nr_of_modes],
        sample[: dataset.nr_of_modes],
    )
    for sample in samples
]
fitness_values = [
    problem.similarity_error(
        sample[2 * dataset.nr_of_modes :],
        sample[dataset.nr_of_modes : 2 * dataset.nr_of_modes],
        sample[: dataset.nr_of_modes],
    )
    for sample in samples
]
distances = np.sqrt(np.sum(np.power(samples - solution, 2), axis=1))
px.scatter(x=distances, y=fitness_values)

In [None]:
min(fitness_values)