## Imports

In [1]:
import torch
from src.pso import SwarmBatch
from src.plots import create_animation, create_heatmap_animation, boxplot
from src.benchmarks import BenchmarkFunction, PARTITIONS, get_benchmarks_partitions
from src.gp import test
from src.custom_velocity_formula import (
    custom_velocity,
    VEL,
    clean_func,
    simplified_func,
    make_custom_velocity_from_yaml,
)
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from src.best_evolved import create_function_name, change_function_name
import mlflow
import pandas as pd
from scipy.stats import mannwhitneyu, wilcoxon
import plotly.express as px
from pathlib import Path
from tqdm.notebook import tqdm

from multiprocessing import Pool, cpu_count
import concurrent.futures

device = "cuda" if torch.cuda.is_available() else "cpu"

results_path = Path("results_dev")
results_path.mkdir(exist_ok=True)

benchmark_names = sorted(list(get_benchmarks_partitions().keys()))
print(benchmark_names)

['ackley', 'cigar', 'easom', 'griewank', 'levy', 'rastrigin', 'rosenbrock', 'schwefel', 'sphere', 'zakharov']


## Filter Runs

In [2]:
# Set dynamic parameters
dims = [30, 50, 100]
dims_len = len(dims)
n_partitions = len(PARTITIONS)
partitions_len = len(PARTITIONS)
expected_n_runs = 30


n_problems = 100
n_particles = 100
lower_bounds = -5.0
upper_bounds = 5.0
bounds = (lower_bounds, upper_bounds)
test_optimum_bounds = (-2, 2)
n_iterations = 100
max_depth_limit = 10
pop_size = 50
max_mut_depth = 3
max_init_depth = 3
cxpb = 0.7
mutpb = 0.3
arguments = [
    "positions",
    "velocity",
    "gbest",
    "pbest",
    "center",
    "magnitude",
    "dispersion",
    "num_particles",
]
tourn_size = 2
context = ["add", "sub", "mul", "div", "inv", "cos"]


## Find best functions

In [23]:
# create the filter_str for mlflow search
filter_str = " and ".join([
    f'params.{key} = "{str(value)}"'
    for key, value in {
        "n_problems": n_problems,
        "n_particles": n_particles,
        "n_generations": n_iterations,
        "max_depth_limit": max_depth_limit,
        "lower_bound": lower_bounds,
        "pop_size": pop_size,
        "max_mut_depth": max_mut_depth,
        "max_init_depth": max_init_depth,
        "cxpb": cxpb,
        "mutpb": mutpb,
        "arguments": str(arguments),
        "tourn_size": tourn_size,
        "context": str(context),
        # "dim": dim,
    }.items()
])

# Fetch all runs from the experiment
runs_gppso = mlflow.search_runs(
    experiment_names=["GP+PSO [train]"], filter_string=filter_str
)
# filter for dims
runs_gppso = runs_gppso[runs_gppso["params.dim"].astype(int).isin(dims)]

In [24]:
inverse_dict = {str(v["test"]): k for k, v in PARTITIONS.items()}

df_grouped = (
    runs_gppso.groupby(["params.dim", "params.test_benchmarks", "params.run_type"])
    .size()
    .reset_index()
)
df_grouped.columns = ["dim", "partition", "run_type", "count"]
df_grouped["partition"] = df_grouped["partition"].apply(lambda x: inverse_dict[x])
df_grouped["dim"] = df_grouped["dim"].astype(str)
df_grouped["partition"] = df_grouped["partition"].astype(int)
df_grouped = df_grouped.pivot_table(
    index="partition", columns=["dim", "run_type"], values="count", fill_value=0
).T

df_grouped

Unnamed: 0_level_0,partition,1,2,3,4,5
dim,run_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
100,gp+pso train,30.0,29.0,30.0,30.0,30.0
30,gp+pso train,30.0,30.0,30.0,30.0,30.0
50,gp+pso train,30.0,30.0,30.0,30.0,30.0


In [25]:
df = runs_gppso[runs_gppso["status"] == "FINISHED"].copy()
df["partition"] = (
    df["params.test_benchmarks"].apply(lambda x: inverse_dict[x]).astype(int)
)
# for each partition and dimension, let's get the 3 best runs from 'metrics.train_fitness' (lower is better)
# without using groupby, we want to keep all original rows and columns
df["rank"] = df.groupby(["partition", "params.dim"])["metrics.train_fitness"].rank(
    method="min"
)
df = df[df["rank"] <= 3].copy()
df = df[df["params.dim"] != "10"]
df.reset_index(drop=True, inplace=True)


In [27]:
new_run_ids = df.run_id.tolist()
new_run_ids

['3296f62ab43f49a5bc3a1c0eee4089b2',
 'ac4b1db429aa4026aad39230be3d60f2',
 'c7ec94d9c9be4338a3535a3a504c17df',
 '7bca1fa9088443ac8b80d9d7f4521384',
 '34623c7a393444518373eaed94ed6da4',
 '340d842e71104186a858584f1e27cb5a',
 '559d7f4400334f1fbe576435bbd752e7',
 '49a1223265184ac8a48357c14100498e',
 'ee38cbed8fbc448e9c269c024207dc55',
 'f1fad5761e8643729edb37555cd01388',
 'e188567885234209b86dffa08ae0a39e',
 'fe086d5b0ea1443cb8627b9e7bbc5a20',
 '1b1e2825947b42bdabe751c5160f1eb7',
 '45251ff6d0bb44fca236831593b03d4e',
 '4749f7a727634f8198c176c6a280087b',
 '4f7a1fd4afab44929ddca72f03dcd61a',
 '1761d4f7e5fd4ac3887d1cf4caea70ee',
 'bef55e6aa98d4647a1230cfdf2c95657',
 '9b4b8424273147bca66eb4cb56e864e2',
 '175bb7b71b5c42b4b3be2fd1f2b29bea',
 '22cd5f85c4724d25a1681a4a4d00c30e',
 '00ccd6f9c1084f91a0e6a8e4e26a1f71',
 'b6cd14d6d99f4323826590f37d002dbb',
 'e7ff67da1eee4340b89b70f0b390e9f8',
 '037d1a7bd04440a491fb9fb0e1f67d81',
 'b623458c06ea46e7a7fa2116626595b0',
 'd4578883d8764b35b00a5716b75d9d43',
 

'3296f62ab43f49a5bc3a1c0eee4089b2',
'ac4b1db429aa4026aad39230be3d60f2',
'c7ec94d9c9be4338a3535a3a504c17df',
'7bca1fa9088443ac8b80d9d7f4521384',
'34623c7a393444518373eaed94ed6da4',
'340d842e71104186a858584f1e27cb5a',
'559d7f4400334f1fbe576435bbd752e7',
'49a1223265184ac8a48357c14100498e',
'ee38cbed8fbc448e9c269c024207dc55',
'f1fad5761e8643729edb37555cd01388',
'e188567885234209b86dffa08ae0a39e',
'fe086d5b0ea1443cb8627b9e7bbc5a20',
'1b1e2825947b42bdabe751c5160f1eb7',
'45251ff6d0bb44fca236831593b03d4e',
'4749f7a727634f8198c176c6a280087b',
'4f7a1fd4afab44929ddca72f03dcd61a',
'1761d4f7e5fd4ac3887d1cf4caea70ee',
'bef55e6aa98d4647a1230cfdf2c95657',
'9b4b8424273147bca66eb4cb56e864e2',
'175bb7b71b5c42b4b3be2fd1f2b29bea',
'22cd5f85c4724d25a1681a4a4d00c30e',
'00ccd6f9c1084f91a0e6a8e4e26a1f71',
'b6cd14d6d99f4323826590f37d002dbb',
'e7ff67da1eee4340b89b70f0b390e9f8',
'037d1a7bd04440a491fb9fb0e1f67d81',
'b623458c06ea46e7a7fa2116626595b0',
'd4578883d8764b35b00a5716b75d9d43',
'8324b70b03aa4be1bb654b92541a3927',
'c05cd3d6deba4d25a52601c7e5b78271',
'b302832094ec42c1bcd775405af1876e',
'cbf9533f1ab94bdb972cbae7454a279a',
'290f9721551247d487ef953b0b7e0285',
'b182a91c47e3493e85a5e5e99f9fe86a',
'610a03f1d8204e14bc427609c960c400',
'ab5406b3a0034740bc9261dea63281ad',
'42a73465fe0749d79a3321a6c67ec577',
'38d061f804be4f6a9b10365f05059478',
'2a8c59f91e3c41e987211e8718a3e11e',
'b2f6663bc45d43b89781a86145fe3b1e',
'a47aa4c3a6724e4088e2e198bc279853',
'6c6a77ea79334d36b12778f2d6a2b90f',
'00183d9d3cd64faf816fd0d38fc87b41',
'd0a845bb3bf942e282b31aa06e77c5bc',
'17d66b4f26324c79a0294c45290a0770',
'8a131b84402f4b56bd38308ab5bff51e'

--run-ids e188567885234209b86dffa08ae0a39e \
--run-ids fe086d5b0ea1443cb8627b9e7bbc5a20 \
--run-ids 1b1e2825947b42bdabe751c5160f1eb7 \
--run-ids 45251ff6d0bb44fca236831593b03d4e \
--run-ids 4749f7a727634f8198c176c6a280087b \
--run-ids 4f7a1fd4afab44929ddca72f03dcd61a \
--run-ids 1761d4f7e5fd4ac3887d1cf4caea70ee \
--run-ids bef55e6aa98d4647a1230cfdf2c95657 \
--run-ids 9b4b8424273147bca66eb4cb56e864e2 \
--run-ids 175bb7b71b5c42b4b3be2fd1f2b29bea \
--run-ids 22cd5f85c4724d25a1681a4a4d00c30e \
--run-ids 00ccd6f9c1084f91a0e6a8e4e26a1f71 \
--run-ids b6cd14d6d99f4323826590f37d002dbb \
--run-ids e7ff67da1eee4340b89b70f0b390e9f8 \
--run-ids 037d1a7bd04440a491fb9fb0e1f67d81 \
--run-ids b623458c06ea46e7a7fa2116626595b0 \
--run-ids d4578883d8764b35b00a5716b75d9d43 \
--run-ids 8324b70b03aa4be1bb654b92541a3927 \
--run-ids 7648cdb5b8ca42d18a19d50b08a19389 \
--run-ids c05cd3d6deba4d25a52601c7e5b78271 \
--run-ids b302832094ec42c1bcd775405af1876e \
--run-ids cbf9533f1ab94bdb972cbae7454a279a \
--run-ids 290f9721551247d487ef953b0b7e0285 \
--run-ids b182a91c47e3493e85a5e5e99f9fe86a \
--run-ids 610a03f1d8204e14bc427609c960c400 \
--run-ids ab5406b3a0034740bc9261dea63281ad \
--run-ids 42a73465fe0749d79a3321a6c67ec577 \
--run-ids 38d061f804be4f6a9b10365f05059478 \
--run-ids 2a8c59f91e3c41e987211e8718a3e11e \
--run-ids b2f6663bc45d43b89781a86145fe3b1e \
--run-ids a47aa4c3a6724e4088e2e198bc279853 \
--run-ids 6c6a77ea79334d36b12778f2d6a2b90f \
--run-ids 00183d9d3cd64faf816fd0d38fc87b41 \
--run-ids d0a845bb3bf942e282b31aa06e77c5bc \
--run-ids 17d66b4f26324c79a0294c45290a0770 \
--run-ids 8a131b84402f4b56bd38308ab5bff51e

## Analyze best functions

In [7]:
old_run_ids = list(
    set([
        "d0a845bb3bf942e282b31aa06e77c5bc",
        "d4578883d8764b35b00a5716b75d9d43",
        "34623c7a393444518373eaed94ed6da4",
        "c7ec94d9c9be4338a3535a3a504c17df",
        "1b1e2825947b42bdabe751c5160f1eb7",
        # "bef55e6aa98d4647a1230cfdf2c95657",
        # "e188567885234209b86dffa08ae0a39e",
        # "fe086d5b0ea1443cb8627b9e7bbc5a20",
        # "45251ff6d0bb44fca236831593b03d4e",
        # "4749f7a727634f8198c176c6a280087b",
        # "4f7a1fd4afab44929ddca72f03dcd61a",
        # "1761d4f7e5fd4ac3887d1cf4caea70ee",
        # "9b4b8424273147bca66eb4cb56e864e2",
        # "175bb7b71b5c42b4b3be2fd1f2b29bea",
        # "22cd5f85c4724d25a1681a4a4d00c30e",
        # "00ccd6f9c1084f91a0e6a8e4e26a1f71",
        # "b6cd14d6d99f4323826590f37d002dbb",
        # "e7ff67da1eee4340b89b70f0b390e9f8",
        # "037d1a7bd04440a491fb9fb0e1f67d81",
        # "b623458c06ea46e7a7fa2116626595b0",
        # "8324b70b03aa4be1bb654b92541a3927",
        # "7648cdb5b8ca42d18a19d50b08a19389",
        # "c05cd3d6deba4d25a52601c7e5b78271",
        # "b302832094ec42c1bcd775405af1876e",
        # "cbf9533f1ab94bdb972cbae7454a279a",
        # "290f9721551247d487ef953b0b7e0285",
        # "b182a91c47e3493e85a5e5e99f9fe86a",
        # "610a03f1d8204e14bc427609c960c400",
        # "ab5406b3a0034740bc9261dea63281ad",
        # "42a73465fe0749d79a3321a6c67ec577",
        # "38d061f804be4f6a9b10365f05059478",
        # "2a8c59f91e3c41e987211e8718a3e11e",
        # "b2f6663bc45d43b89781a86145fe3b1e",
        # "a47aa4c3a6724e4088e2e198bc279853",
        # "6c6a77ea79334d36b12778f2d6a2b90f",
        # "00183d9d3cd64faf816fd0d38fc87b41",
        # "17d66b4f26324c79a0294c45290a0770",
        # "8a131b84402f4b56bd38308ab5bff51e",
    ])
)

In [5]:
old_run_ids = set(old_run_ids)
new_run_ids = set(new_run_ids)

# delete old runs:
non_used_old_runs = old_run_ids - new_run_ids
# delete old runs from mlflow
for run_id in non_used_old_runs:
    print(run_id)
    mlflow.delete_run(run_id)

new_runs = new_run_ids - old_run_ids

NameError: name 'new_run_ids' is not defined

In [6]:
new_run_ids - old_run_ids

NameError: name 'new_run_ids' is not defined

--run-ids '3296f62ab43f49a5bc3a1c0eee4089b2' \
--run-ids '340d842e71104186a858584f1e27cb5a' \
--run-ids '34623c7a393444518373eaed94ed6da4' \
--run-ids '49a1223265184ac8a48357c14100498e' \
--run-ids '559d7f4400334f1fbe576435bbd752e7' \
--run-ids '7bca1fa9088443ac8b80d9d7f4521384' \
--run-ids 'ac4b1db429aa4026aad39230be3d60f2' \
--run-ids 'c7ec94d9c9be4338a3535a3a504c17df' \
--run-ids 'ee38cbed8fbc448e9c269c024207dc55' \
--run-ids 'f1fad5761e8643729edb37555cd01388'

In [153]:
old_run_ids - new_run_ids

set()

In [3]:
best_5_runs = [
    "d0a845bb3bf942e282b31aa06e77c5bc",
    "d4578883d8764b35b00a5716b75d9d43",
    "34623c7a393444518373eaed94ed6da4",
    "c7ec94d9c9be4338a3535a3a504c17df",
    "1b1e2825947b42bdabe751c5160f1eb7",
]

In [4]:
run_ids = best_5_runs
runs_dimevo = mlflow.search_runs(
    experiment_names=["GP+PSO [train]"],
    filter_string=f"attributes.run_id IN ({','.join([f"'{run_id}'" for run_id in run_ids])})",
)

inverse_dict = {str(v["test"]): k for k, v in PARTITIONS.items()}
# create col in runs_dimevo with the partition name
runs_dimevo["partition"] = runs_dimevo["params.test_benchmarks"].apply(
    lambda x: inverse_dict[x]
)
runs_dimevo["dim"] = runs_dimevo["params.dim"].astype(int)
runid_x_partition = runs_dimevo.set_index("run_id")["partition"].to_dict()
partition_run_ids = runs_dimevo.groupby("partition")["run_id"].apply(list).to_dict()
runid_x_dim = runs_dimevo.set_index("run_id")["dim"].to_dict()

## Visualisation

In [10]:
# track the best particle throughout the iterations

In [None]:
for b in tqdm(benchmark_names):
    n_particles = 100
    bounds = (-5, 5)
    optimum_bounds = (-2, 2)
    plot_bounds = (min(bounds[0], optimum_bounds[0]), max(bounds[1], optimum_bounds[1]))
    print(f"Using device: {device}")
    benchmark_func = BenchmarkFunction(
        benchmark_name=b,
        dimensions=2,
        bounds=bounds,
        num_problems=1,
        optimum_bounds=optimum_bounds,
        device=device,
    )
    grid_size = 200
    x = torch.linspace(*plot_bounds, grid_size, device=device)
    X, Y = torch.meshgrid(x, x, indexing="ij")
    positions = torch.stack([Y.flatten(), X.flatten()], dim=1)
    pos_bench = benchmark_func(positions.unsqueeze(0))
    pos_bench_1 = benchmark_func(positions.unsqueeze(0))
    Z = pos_bench[0].reshape(X.shape)
    benchmark_grid = ((Z - Z.mean()) / Z.std()).cpu().numpy()

    for run_id in run_ids + ["pso", "psog3", "psodisp2", "pso"]:
        p = runid_x_partition[run_id]
        # b is a string with a name of a function
        # we need to see if b is in the list of benchmarks of this partition
        # if b in PARTITIONS[p]["train"]:
        # continue
        d = runid_x_dim[run_id]
        func_name = create_function_name(run_id, d, p)
        func_path = results_path / "animations" / f"{func_name}"
        func_path.mkdir(exist_ok=True, parents=True)
        benchmark_path = results_path / "animations" / f"{b}"

        custom_vel_func = make_custom_velocity_from_yaml(run_id, device)

        # Run PSO
        swarm = SwarmBatch(
            benchmark_func=benchmark_func,
            velocity_function=custom_vel_func,
            num_particles=n_particles,
            device=device,
            save_history=True,
            # vel_func_str=clean_func(VEL),
        )
        swarm.optimize(num_iterations=100, use_tqdm=False)

        # Create and show animation
        if n_particles <= 500:
            fig = create_animation(swarm, benchmark_func, benchmark_grid, renderer=None)
        else:
            create_heatmap_animation(
                swarm, benchmark_func, benchmark_grid, grid_size=grid_size
            )
        # in plotly saving the html, how do we add a title in the web browser tab?

        fig.write_html(func_path / f"{b}_2D.html")

  0%|          | 0/10 [00:00<?, ?it/s]

Using device: cuda


Using device: cuda
Using device: cuda
Using device: cuda
Using device: cuda
Using device: cuda
Using device: cuda
Using device: cuda
Using device: cuda
Using device: cuda


In [None]:
import graphviz


def parse_expression(expr, parent_id=0, counter=[0]):
    """
    Recursively parses the GP tree string and creates nodes for visualization.
    """
    expr = expr.strip()
    node_id = counter[0]
    counter[0] += 1

    # Extract function/operator and arguments
    if "(" in expr:
        func = expr.split("(", 1)[0]
        args = expr[len(func) + 1 : -1]  # Remove function name and parentheses

        # Create a node for this function/operator
        dot.node(str(node_id), func)

        # If there's a parent, connect it
        if parent_id is not None:
            dot.edge(str(parent_id), str(node_id))

        # Split arguments correctly (handling nested functions)
        depth = 0
        current_arg = ""
        children = []
        for char in args:
            if char == "," and depth == 0:
                children.append(current_arg.strip())
                current_arg = ""
            else:
                current_arg += char
                if char == "(":
                    depth += 1
                elif char == ")":
                    depth -= 1
        if current_arg:
            children.append(current_arg.strip())

        # Recursively process child nodes
        for child in children:
            parse_expression(child, node_id, counter)

    else:
        # Leaf node (e.g., variables or constants)
        dot.node(str(node_id), expr)
        if parent_id is not None:
            dot.edge(str(parent_id), str(node_id))


# Initialize Graphviz object
dot = graphviz.Digraph(format="png", graph_attr={"rankdir": "TB"})

# Your GP tree string
gp_tree_str = """neg(div(mul(add(add(num_iterations, mul(mul(sub(dispersion,
  dispersion), mul(center, gbest)), gbest)), mul(dispersion, velocity)), neg(sub(velocity,
  positions))), add(add(add(pbest, dispersion), add(pbest, num_iterations)), div(div(div(gbest,
  velocity), mul(positions, mul(mul(velocity, pbest), div(dispersion, num_iterations)))),
  mul(num_iterations, velocity)))))"""

# Generate the tree
parse_expression(VEL)

# Render and display
dot.render("gp_tree")  # Saves as 'gp_tree.png'
dot


## Evolved vs Rest

In [5]:
runs = mlflow.search_runs(
    experiment_names=["evolved_functions"],
    # filter_string=f"params.func_run_id IN ({','.join([f"'{run_id}'" for run_id in run_ids])})",
)
runs = runs[~runs["metrics.fitness"].isna()]
set(runs["params.func_run_id"].dropna().unique().tolist()) == set(run_ids)
set(run_ids).issubset(set(runs["params.func_run_id"].dropna().unique().tolist()))

True

In [6]:
mask1 = runs["params.func_run_id"].isin(run_ids)
# mask2 func_run_id is nan
mask2 = runs["params.func_run_id"].isna()
runs = runs[mask1 | mask2].copy()
runs.reset_index(drop=True, inplace=True)

How many?

In [7]:
# group df_f by dim,  partition and run_type and collect the count of runs
df_grouped = (
    runs.groupby(["params.dim", "params.test_benchmarks", "params.run_type"])
    .size()
    .reset_index()
)
df_grouped.columns = ["dim", "partition", "run_type", "count"]
df_grouped["partition"] = df_grouped["partition"].apply(lambda x: inverse_dict[x])
df_grouped["dim"] = df_grouped["dim"].astype(str)
df_grouped["partition"] = df_grouped["partition"].astype(int)
df_grouped = df_grouped.pivot_table(
    index="partition", columns=["dim", "run_type"], values="count", fill_value=0
).T
df_grouped

Unnamed: 0_level_0,partition,1,2,3,4,5
dim,run_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
100,X-1B1E-50,0.0,0.0,0.0,0.0,30.0
100,X-3462-100,0.0,0.0,30.0,0.0,0.0
100,X-C7EC-100,0.0,0.0,0.0,30.0,0.0
100,X-D0A8-30,30.0,0.0,0.0,0.0,0.0
100,X-D457-100,0.0,30.0,0.0,0.0,0.0
100,pso,60.0,60.0,60.0,60.0,60.0
100,psocd1,60.0,60.0,60.0,60.0,60.0
100,psodisp2,60.0,60.0,60.0,60.0,60.0
100,psog3,60.0,60.0,60.0,60.0,60.0
30,X-1B1E-50,0.0,0.0,0.0,0.0,30.0


In [None]:
runs

In [8]:
num_cols = len(dims)
num_rows = 1

# colors = {
#     "OURS": "#E74C3C",  # Red
#     "PSO": "#3498DB",  # Blue
#     "PSO-G3": "#2ECC71",  # Green
# }


for p in PARTITIONS.keys():
    fig_subplot = make_subplots(
        rows=num_rows,
        cols=num_cols,
        vertical_spacing=0.1,
        horizontal_spacing=0.05,
        shared_yaxes=False,
        column_titles=[f"Dim {d}" for d in dims],
    )
    # Update layout
    fig_subplot.update_layout(
        height=600 * num_rows,  # Adjust height dynamically
        width=500 * num_cols,  # Adjust width dynamically
        title_text="Fitness across Dimensions",
        showlegend=False,  # Hide legend for clarity
        template="plotly_dark",
    )

    for d in dims:
        # create paths
        partition_path = results_path / f"partition_{p}" / "best_evolved"
        partition_path.mkdir(parents=True, exist_ok=True)
        # dim_path = partition_path / f"dim_{d}"
        # dim_path.mkdir(parents=True, exist_ok=True)

        # filter by dim
        df_dims = runs[runs["params.dim"] == str(d)]

        # filter by partition
        df_p = df_dims[df_dims["params.test_benchmarks"] == str(PARTITIONS[p]["test"])]

        # # change params.run_type that start with "X-" to take on params.func_run_id and apply change_function_name
        # df_p.loc[df_p["params.run_type"].str.startswith("X-"), "params.run_type"] = (
        #     df_p["params.run_type"].apply(lambda x: change_function_name(x, p))
        # )

        # pivot
        df_pivot = df_p.pivot(columns="params.run_type", values="metrics.fitness")

        # Remove NaN values by stacking and resetting index
        df_squished = pd.DataFrame({
            col: df_pivot[col].dropna().reset_index(drop=True)
            for col in df_pivot.columns
        })

        df_squished.rename(
            columns={
                "pso": "PSO",
                "psocd1": "PSO-CD1",
                "psodisp2": "PSO-DISP2",
                "psog3": "PSO-G3",
            },
            inplace=True,
        )
        df_squished.dropna(axis=0, inplace=True)
        df_squished.round(5).to_csv(
            partition_path / f"dim_{d}_fitnesses.txt", sep=" ", index=False
        )

        fig = boxplot(
            path=partition_path,
            df=df_squished,
            dimension=d,
            partition=p,
            # cols_to_remove=["PSO-CD1", "PSO-DISP2"],
            filename="fitness_comparison",
        )
        for data in fig.data:
            fig_subplot.add_trace(
                data,
                row=1,
                col=dims.index(d) + 1,
            )
    fig_subplot.write_html(partition_path / "fitness_comparison.html")
    fig_subplot.write_image(partition_path / "fitness_comparison.png", scale=2)

    # TODO: Later
    # p_values = {}
    # for col in df_p.columns:
    #     if col not in "OURS":
    #         continue
    #     p_values[col] = wilcoxon(df_p["OURS"], df_p[col]).pvalue.round(10)

    # df_p = pd.DataFrame(p_values.items(), columns=["Algorithm", "p-value"])
    # df_p["significance"] = df_p["p-value"] < 0.05
    # df_p.to_csv(dim_path / f"p{p}_p_values.txt", sep=" ", index=False)


In [198]:
df_p["params.run_type"]

0          psodisp2
1            psocd1
2             psog3
3               pso
66         psodisp2
           ...     
3981    P5-1B1E-50D
3982       psodisp2
3983         psocd1
3984          psog3
3985            pso
Name: params.run_type, Length: 270, dtype: object

## Partition Total Fitness & Wilcoxon

In [95]:
# create the filter_str for mlflow search
filter_str = " and ".join([
    f'params.{key} = "{str(value)}"'
    for key, value in {
        "n_problems": n_problems,
        "n_particles": n_particles,
        "n_generations": n_iterations,
        "max_depth_limit": max_depth_limit,
        "lower_bound": lower_bounds,
        "pop_size": pop_size,
        "max_mut_depth": max_mut_depth,
        "max_init_depth": max_init_depth,
        "cxpb": cxpb,
        "mutpb": mutpb,
        "arguments": str(arguments),
        "tourn_size": tourn_size,
        "context": str(context),
        # "dim": dim,
    }.items()
])

# Fetch all runs from the experiment
runs_gppso = mlflow.search_runs(
    experiment_names=["GP+PSO [train]"], filter_string=filter_str
)
mask1 = runs_gppso["status"] == "FINISHED"
# check if params.dim is in dims
mask2 = runs_gppso["params.dim"].astype(int).isin(dims)
runs_gppso = runs_gppso[mask1 & mask2].copy()
runs_gppso.reset_index(drop=True, inplace=True)

In [96]:
runs_dimevo = runs_gppso.copy()
runs_dimevo["partition"] = runs_dimevo["params.test_benchmarks"].apply(
    lambda x: inverse_dict[x]
)
runs_dimevo["dim"] = runs_dimevo["params.dim"].astype(int)
runid_x_partition = runs_dimevo.set_index("run_id")["partition"].to_dict()
partition_run_ids = runs_dimevo.groupby("partition")["run_id"].apply(list).to_dict()
runid_x_dim = runs_dimevo.set_index("run_id")["dim"].to_dict()

In [97]:
# group df_f by dim,  partition and run_type and collect the count of runs_gppso
df_grouped = (
    runs_gppso.groupby(["params.dim", "params.test_benchmarks", "params.run_type"])
    .size()
    .reset_index()
)
df_grouped.columns = ["dim", "partition", "run_type", "count"]
df_grouped["partition"] = df_grouped["partition"].apply(lambda x: inverse_dict[x])
df_grouped["dim"] = df_grouped["dim"].astype(str)
df_grouped["partition"] = df_grouped["partition"].astype(int)
df_grouped = df_grouped.pivot_table(
    index="partition", columns=["dim", "run_type"], values="count", fill_value=0
).T
df_grouped

Unnamed: 0_level_0,partition,1,2,3,4,5
dim,run_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
100,gp+pso train,30.0,29.0,30.0,30.0,30.0
30,gp+pso train,30.0,30.0,30.0,30.0,30.0
50,gp+pso train,30.0,30.0,30.0,30.0,30.0


In [98]:
# sort inversed runs_dimevo by metrics.best_ind_fitness
runs_dimevo = runs_dimevo.sort_values(
    by=["params.dim", "params.test_benchmarks", "metrics.best_ind_fitness"],
    ascending=[True, True, True],
)
runs_dimevo.reset_index(drop=True, inplace=True)

In [None]:
# Define the number of rows (dimensions) and columns (partitions)
num_rows = len(dims)
num_cols = len(PARTITIONS)

colors = {
    "GP-PSO-TEST": "#E74C3C",  # Red
    "PSO": "#3498DB",  # Blue
    "PSO-G3": "#2ECC71",  # Green
}


# Create subplot layout
fig_subplot = make_subplots(
    rows=num_rows,
    cols=num_cols,
    vertical_spacing=0.1,
    horizontal_spacing=0.05,
    shared_yaxes=False,
    column_titles=[f"Partition {p}" for p in PARTITIONS.keys()],
)
# Update layout
fig_subplot.update_layout(
    height=300 * num_rows,  # Adjust height dynamically
    width=400 * num_cols,  # Adjust width dynamically
    title_text="Fitness Comparisons Across Partitions and Dimensions",
    showlegend=False,  # Hide legend for clarity
    template="plotly_dark",
    yaxis_title="Fitness",
)

for p in PARTITIONS.keys():
    partition_path = results_path / f"partition_{p}"
    partition_path.mkdir(exist_ok=True)
    for d in dims:
        dim_path = partition_path / f"dim_{d}"
        dim_path.mkdir(exist_ok=True)
        df_dims = runs_gppso[runs_gppso["params.dim"] == str(d)]
        df_dims = df_dims[df_dims["params.dim"] == str(d)]
        if len(df_dims) < (total_per_dim := expected_n_runs * partitions_len):
            print(f"Missing runs ({len(df_dims)}/{total_per_dim} completed) on dim {d}")
            # continue
        df_p = df_dims[df_dims["params.test_benchmarks"] == str(PARTITIONS[p]["test"])]
        df_p = df_p[
            [
                "metrics.train_fitness",
                "metrics.test_fitness",
                "metrics.test_fitness_pso",
                "metrics.test_fitness_cd1",
                "metrics.test_fitness_disp2",
                "metrics.test_fitness_g3",
                "run_id",
            ]
        ]
        df_p.rename(
            columns={
                "metrics.train_fitness": "GP-PSO-TRAIN",
                "metrics.test_fitness": "GP-PSO-TEST",
                "metrics.test_fitness_pso": "PSO",
                "metrics.test_fitness_cd1": "PSO-CD1",
                "metrics.test_fitness_disp2": "PSO-DISP2",
                "metrics.test_fitness_g3": "PSO-G3",
            },
            inplace=True,
        )
        df_bp = df_p.copy()
        df_bp["func_name"] = df_bp["run_id"].apply(
            lambda x: create_function_name(x, d, p)
        )
        fig = boxplot(
            path=dim_path,
            df=df_bp,
            dimension=d,
            partition=p,
            cols_to_remove=["GP-PSO-TRAIN", "PSO-CD1", "PSO-DISP2"],
            hover_name="func_name",
        )

        for data in fig.data:
            fig_subplot.add_trace(
                data,
                row=dims.index(d) + 1,
                col=list(PARTITIONS.keys()).index(p) + 1,
            )

        df_p.drop(columns=["run_id"], inplace=True)
        df_p.round(5).to_csv(dim_path / f"p{p}_fitnesses.txt", sep=" ", index=False)

        p_values = {}
        for col in df_p.columns:
            if col == "GP-PSO-TRAIN" or col == "GP-PSO-TEST":
                continue
            p_values[col] = wilcoxon(df_p["GP-PSO-TEST"], df_p[col]).pvalue.round(10)

        df_pvalues = pd.DataFrame(p_values.items(), columns=["Algorithm", "p-value"])
        df_pvalues["significance"] = df_pvalues["p-value"] < 0.05
        df_pvalues.to_csv(dim_path / f"p{p}_p_values.txt", sep=" ", index=False)


# Add "dim={d}" labels to the left side of each row
for i, d in enumerate(dims, start=1):
    fig_subplot.update_yaxes(title_text=f"dim={d}", row=i, col=1)

# Add "Partition X" labels at the bottom of each column
for i, p in enumerate(PARTITIONS.keys(), start=1):
    fig_subplot.update_xaxes(
        title_text=f"Benchmarks<br>{' - '.join(PARTITIONS[p]['test'])}",
        row=num_rows,
        col=i,
        title_font_size=10,
    )

# Save the figure
fig_subplot.write_image(results_path / "fitness_comparison.png", scale=2)
fig_subplot.update_layout(
    autosize=True,
    height=None,
    width=None,
)
fig_subplot.write_html(results_path / "fitness_comparison.html")

Missing runs (149/150 completed) on dim 100
Missing runs (149/150 completed) on dim 100
Missing runs (149/150 completed) on dim 100
Missing runs (149/150 completed) on dim 100
Missing runs (149/150 completed) on dim 100


In [99]:
df_f = runs_gppso.copy()

In [None]:
df_f["partition"] = df_f["params.test_benchmarks"].apply(lambda x: inverse_dict[x])
df_f["func_name"] = df_f["run_id"].apply(lambda x: create_function_name(x, 50, 1))

In [104]:
mask1 = df_dims["partition"] == 1

In [105]:
df_f[df_f["func_name"] == "X-BEF5-50"]

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.fitness,metrics.fitness_std,metrics.depth_std,metrics.depth_max,...,params.n_particles,tags.mlflow.source.git.commit,tags.mlflow.runName,tags.mlflow.source.name,tags.mlflow.user,tags.mlflow.source.type,tags.vel_function,tags.mlflow.note.content,partition,func_name
150,bef55e6aa98d4647a1230cfdf2c95657,796129385963021622,FINISHED,file:///home/fsx/repos/pso-gp/mlruns/796129385...,2025-03-26 04:49:21.779000+00:00,2025-03-26 04:59:51.955000+00:00,4.49205,6.342975,2.92,10.0,...,100,c3afc2636dde4ca8b172fd42be0c21e77a5b61ee,masked-rook-375,/home/fsx/repos/pso-gp/src/gp.py,fsx,LOCAL,,,2,X-BEF5-50


## Per Benchmark

In [None]:
dim = 30
experiment_names = [
    "GP+PSO [test]",
    "PSO [test]",
    "PSODISP2 [test]",
    "PSOCD1 [test]",
    "PSOG3 [test]",
]

# Changing filter parameters
results_dim = results_path / "benchmarks" / f"dim_{dim}"
results_dim.mkdir(exist_ok=True)


# create the filter_str for mlflow search
filter_str = " and ".join([
    f'params.{key} = "{str(value)}"'
    for key, value in {
        "n_particles": n_particles,
        "n_generations": n_iterations,
        "max_depth_limit": max_depth_limit,
        "lower_bound": lower_bounds,
        "pop_size": pop_size,
        "max_mut_depth": max_mut_depth,
        "max_init_depth": max_init_depth,
        "cxpb": cxpb,
        "mutpb": mutpb,
        "tourn_size": tourn_size,
        "dim": dim,
    }.items()
])

# Fetch all runs from the experiment
runs_bm = mlflow.search_runs(
    experiment_names=experiment_names, filter_string=filter_str
)


['ackley', 'cigar', 'easom', 'griewank', 'levy', 'rastrigin', 'rosenbrock', 'schwefel', 'sphere', 'zakharov']


In [86]:
cols_to_keep = [
    "params.dataset_id",
    "params.run_type",
    "params.dim",
    "params.test_benchmarks",
] + [
    # any columns that start with "metrics.test_fit_"
    col
    for col in runs_bm.columns
    if col.startswith("metrics.test_fit_")
]

In [87]:
df_p = runs_bm[cols_to_keep]
# set params.run_type to str(value) without slicing
all_run_types = df_p["params.run_type"].unique().tolist()
all_run_types

['psodisp2 test', 'psocd1 test', 'psog3 test', 'pso test', 'gp+pso test']

In [None]:
# Create a dictionary to store benchmark-specific DataFrames
benchmark_dfs = {}

# Loop over each benchmark
for benchmark in benchmark_names:
    benchmark_col = f"metrics.test_fit_{benchmark}"

    # Pivot to get rows indexed by dataset_id, columns by run_type
    pivoted = df_p.pivot_table(
        index="params.dataset_id", columns="params.run_type", values=benchmark_col
    )

    # Filter out rows where "gp+pso test" or any comparator is missing
    # (this prevents issues when a dataset_id is not available for both run types)
    valid_rows = pivoted.dropna(subset=["gp+pso test"])

    benchmark_dfs[benchmark] = valid_rows

benchmark_dfs.keys()


dict_keys(['ackley', 'cigar', 'easom', 'griewank', 'levy', 'rastrigin', 'rosenbrock', 'schwefel', 'sphere', 'zakharov'])

In [None]:
# Build dict to store p-value tables per benchmark
pval_tables = {}
baseline = "gp+pso test"
for benchmark in benchmark_names:
    col = f"metrics.test_fit_{benchmark}"

    # Pivot table to align paired values
    pivoted = df_p.pivot_table(
        index="params.dataset_id", columns="params.run_type", values=col
    ).dropna(subset=[baseline])  # keep only datasets where baseline is present

    # Store p-values comparing to baseline
    pvals = {}

    for b in all_run_types:
        if b == baseline:
            continue
        if b not in pivoted.columns:
            continue

        # Drop rows where either value is missing
        paired = pivoted[[baseline, b]].dropna()

        try:
            stat, p = wilcoxon(paired[baseline], paired[b], alternative="two-sided")
            pvals[b] = p
        except ValueError:
            pvals[b] = None  # e.g., not enough data or all values are equal

    # Convert to DataFrame
    pval_tables[benchmark] = pd.DataFrame.from_dict(
        pvals, orient="index", columns=["p-value"]
    )
    pval_tables[benchmark]["significant"] = pval_tables[benchmark]["p-value"] < 0.05

In [None]:
summary_rows = []

for benchmark in benchmark_names:
    col = f"metrics.test_fit_{benchmark}"

    # Drop rows with missing values in this benchmark
    df_filtered = df_p[["params.run_type", "params.dataset_id", col]].dropna(
        subset=[col]
    )

    # Group by run_type
    group_stats = (
        df_filtered.groupby("params.run_type")[col]
        .agg(["median", "std"])
        .to_dict(orient="index")
    )

    # Build row
    row = {"benchmark": benchmark}
    for run_type, stats in group_stats.items():
        row[f"{run_type}_median"] = stats["median"]
        row[f"{run_type}_std"] = stats["std"]

    summary_rows.append(row)

# Create DataFrame
summary_df = pd.DataFrame(summary_rows).set_index("benchmark").round(5)


In [91]:
summary_df

Unnamed: 0_level_0,gp+pso test_median,gp+pso test_std,pso test_median,pso test_std,psocd1 test_median,psocd1 test_std,psodisp2 test_median,psodisp2 test_std,psog3 test_median,psog3 test_std
benchmark,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ackley,5.43096,0.89174,4.42601,0.2172,13.6838,0.09871,13.67382,0.11307,5.88646,0.08456
cigar,5.17271,1.14956,9.80925,0.75166,13.91054,0.1481,13.9246,0.12958,5.54546,0.09119
easom,6.22878,7.33227,19.33422,0.44692,18.60263,0.23816,18.58947,0.23611,13.19231,4.76189
griewank,5.89589,0.9738,9.80166,0.40183,13.62069,0.11795,13.64796,0.10282,6.63754,0.1802
levy,6.37517,0.42613,12.56373,0.33236,15.74698,0.19927,15.46353,0.21485,6.7689,0.13962
rastrigin,7.48618,0.32642,11.37094,0.18457,15.00525,0.15134,14.93367,0.13589,7.40444,0.12488
rosenbrock,5.4025,1.00662,7.69963,0.69511,14.35099,0.17066,14.3163,0.13353,5.85995,0.08786
schwefel,5.13705,1.33512,1.68045,0.10869,13.56833,0.10292,13.58599,0.09993,5.4125,0.08109
sphere,4.78293,1.46656,1.67412,0.1378,13.5562,0.1125,13.59331,0.09721,5.44489,0.07444
zakharov,6.5811,1.04208,9.52026,0.15539,18.34255,0.2844,19.79116,0.58153,7.55902,0.15851


In [92]:
# get column of min value for each row
best_medians = summary_df[
    [col for col in summary_df.columns if col.endswith("_median")]
].idxmin(axis=1)

best_medians = best_medians.str.replace("_median", "")

In [93]:
best_medians

benchmark
ackley           pso test
cigar         gp+pso test
easom         gp+pso test
griewank      gp+pso test
levy          gp+pso test
rastrigin      psog3 test
rosenbrock    gp+pso test
schwefel         pso test
sphere           pso test
zakharov      gp+pso test
dtype: object

In [None]:
df_median = summary_df.copy()

for b in sorted(all_run_types):
    median_col = f"{b}_median"
    std_col = f"{b}_std"
    joined_col = b  # new name: just the run_type

    df_median[joined_col] = summary_df.apply(
        lambda row: f"{row[median_col]:.2f} ± {row[std_col]:.2f}", axis=1
    )
# Step 3: drop all the *_median and *_std columns
cols_to_drop = [f"{rt}_median" for rt in all_run_types] + [
    f"{rt}_std" for rt in all_run_types
]
df_median = df_median.drop(columns=cols_to_drop)

df_formatted = df_median.copy()

for benchmark in df_formatted.index:
    for run_type in df_formatted.columns:
        val = df_formatted.loc[benchmark, run_type]
        is_best = run_type == best_medians[benchmark]

        # Check if it's significant (p < 0.05 vs baseline)
        is_significant = False
        pval_df = pval_tables.get(benchmark)
        if run_type != baseline and pval_df is not None and run_type in pval_df.index:
            pval = pval_df.loc[run_type, "p-value"]
            is_significant = pd.notna(pval) and pval < 0.05

        # Format LaTeX string
        if is_best:
            val = r"\underline{" + val + "}"
        if is_significant:
            val = r"\textbf{" + val + "}"

        df_formatted.loc[benchmark, run_type] = val

In [95]:
df_formatted

Unnamed: 0_level_0,gp+pso test,pso test,psocd1 test,psodisp2 test,psog3 test
benchmark,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ackley,5.43 ± 0.89,\textbf{\underline{4.43 ± 0.22}},\textbf{13.68 ± 0.10},\textbf{13.67 ± 0.11},\textbf{5.89 ± 0.08}
cigar,\underline{5.17 ± 1.15},\textbf{9.81 ± 0.75},\textbf{13.91 ± 0.15},\textbf{13.92 ± 0.13},\textbf{5.55 ± 0.09}
easom,\underline{6.23 ± 7.33},\textbf{19.33 ± 0.45},\textbf{18.60 ± 0.24},\textbf{18.59 ± 0.24},\textbf{13.19 ± 4.76}
griewank,\underline{5.90 ± 0.97},\textbf{9.80 ± 0.40},\textbf{13.62 ± 0.12},\textbf{13.65 ± 0.10},\textbf{6.64 ± 0.18}
levy,\underline{6.38 ± 0.43},\textbf{12.56 ± 0.33},\textbf{15.75 ± 0.20},\textbf{15.46 ± 0.21},\textbf{6.77 ± 0.14}
rastrigin,7.49 ± 0.33,\textbf{11.37 ± 0.18},\textbf{15.01 ± 0.15},\textbf{14.93 ± 0.14},\textbf{\underline{7.40 ± 0.12}}
rosenbrock,\underline{5.40 ± 1.01},\textbf{7.70 ± 0.70},\textbf{14.35 ± 0.17},\textbf{14.32 ± 0.13},\textbf{5.86 ± 0.09}
schwefel,5.14 ± 1.34,\textbf{\underline{1.68 ± 0.11}},\textbf{13.57 ± 0.10},\textbf{13.59 ± 0.10},\textbf{5.41 ± 0.08}
sphere,4.78 ± 1.47,\textbf{\underline{1.67 ± 0.14}},\textbf{13.56 ± 0.11},\textbf{13.59 ± 0.10},\textbf{5.44 ± 0.07}
zakharov,\underline{6.58 ± 1.04},\textbf{9.52 ± 0.16},\textbf{18.34 ± 0.28},\textbf{19.79 ± 0.58},\textbf{7.56 ± 0.16}


In [96]:
df_formatted.rename(
    columns={
        "gp+pso test": "GP-PSO-TEST",
        "pso test": "PSO",
        "psocd1 test": "PSO-CD1",
        "psodisp2 test": "PSO-DISP2",
        "psog3 test": "PSO-G3",
    },
    inplace=True,
)
df_formatted.index = df_formatted.index.str.capitalize()

In [None]:
df_formatted

In [None]:
df_formatted.reset_index().to_latex(
    results_dim / "benchmarks_table.tex",
    index=False,
    escape=False,
    column_format="l" + "c" * len(df_formatted.columns),
)

# Per Benchmark (Evolved)

In [16]:
runs_bm = mlflow.search_runs(
    experiment_names=["evolved_functions_2"],
    # filter_string=f"params.func_run_id IN ({','.join([f"'{run_id}'" for run_id in run_ids])})",
)
runs_bm = runs_bm[~runs_bm["metrics.fitness"].isna()]
# create partition column
runs_bm["partition"] = runs_bm["params.test_benchmarks"].apply(
    lambda x: inverse_dict[x]
)
func_run_ids = runs_bm["params.func_run_id"].dropna().unique().tolist()
all_run_types = runs_bm["params.run_type"].dropna().unique().tolist()
set(run_ids).issubset(set(runs_bm["params.func_run_id"].dropna().unique().tolist()))

True

In [17]:
mask1 = runs_bm["params.func_run_id"].isin(run_ids)
# mask2 func_run_id is nan
mask2 = runs_bm["params.func_run_id"].isna()
runs_bm = runs_bm[mask1 | mask2].copy()
runs_bm["partition"] = runs_bm["params.test_benchmarks"].apply(
    lambda x: inverse_dict[x]
)
runs_bm.reset_index(drop=True, inplace=True)

In [18]:
runs_bm

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.test_fit_levy,metrics.test_fit_sphere,metrics.test_fit_ackley,metrics.fitness,...,params.upper_bound,params.seed,params.pso_n_iterations,tags.mlflow.runName,tags.mlflow.source.git.commit,tags.mlflow.parentRunId,tags.mlflow.user,tags.mlflow.source.type,tags.mlflow.source.name,partition
0,4890d02f5b054de186046064f31f125c,904979821256722620,FINISHED,file:///home/fsx/repos/pso-gp/mlruns/904979821...,2025-03-31 22:31:06.801000+00:00,2025-03-31 22:31:08.013000+00:00,11.464806,6.045562,11.404215,8.500949,...,5.0,6528,100,burly-hare-703,dd4142bce4c88de4b1fdc8dd2b79af0f791ced6d,b840b5e98d2d46f19a780e0491fecec9,fsx,LOCAL,/home/fsx/repos/pso-gp/src/best_evolved.py,5
1,2197cb22a6ba45c98c260ff6a3b69728,904979821256722620,FINISHED,file:///home/fsx/repos/pso-gp/mlruns/904979821...,2025-03-31 22:31:06.084000+00:00,2025-03-31 22:31:06.748000+00:00,29.394625,27.760088,27.589182,28.296854,...,5.0,6528,100,rebellious-fly-687,dd4142bce4c88de4b1fdc8dd2b79af0f791ced6d,b840b5e98d2d46f19a780e0491fecec9,fsx,LOCAL,/home/fsx/repos/pso-gp/src/best_evolved.py,5
2,e3face86355848d1b01251bc71430726,904979821256722620,FINISHED,file:///home/fsx/repos/pso-gp/mlruns/904979821...,2025-03-31 22:31:05.371000+00:00,2025-03-31 22:31:06.041000+00:00,29.922338,27.864782,27.927904,28.328180,...,5.0,6528,100,charming-toad-321,dd4142bce4c88de4b1fdc8dd2b79af0f791ced6d,b840b5e98d2d46f19a780e0491fecec9,fsx,LOCAL,/home/fsx/repos/pso-gp/src/best_evolved.py,5
3,6e5d863e3cd24dd38394c749227bccf9,904979821256722620,FINISHED,file:///home/fsx/repos/pso-gp/mlruns/904979821...,2025-03-31 22:31:04.275000+00:00,2025-03-31 22:31:05.329000+00:00,12.289762,11.353276,11.700147,11.688994,...,5.0,6528,100,orderly-eel-684,dd4142bce4c88de4b1fdc8dd2b79af0f791ced6d,b840b5e98d2d46f19a780e0491fecec9,fsx,LOCAL,/home/fsx/repos/pso-gp/src/best_evolved.py,5
4,6cd4c1a525124794b7dd0e404ffcc621,904979821256722620,FINISHED,file:///home/fsx/repos/pso-gp/mlruns/904979821...,2025-03-31 22:31:03.255000+00:00,2025-03-31 22:31:04.233000+00:00,26.941837,12.607642,17.818836,18.067215,...,5.0,6528,100,stately-dolphin-580,dd4142bce4c88de4b1fdc8dd2b79af0f791ced6d,b840b5e98d2d46f19a780e0491fecec9,fsx,LOCAL,/home/fsx/repos/pso-gp/src/best_evolved.py,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2245,b61a188eb1d040cdbdcee9b9619728f8,904979821256722620,FINISHED,file:///home/fsx/repos/pso-gp/mlruns/904979821...,2025-03-31 21:58:30.756000+00:00,2025-03-31 21:58:31.869000+00:00,5.852339,,,3.915965,...,5.0,5534,100,skittish-wolf-82,dd4142bce4c88de4b1fdc8dd2b79af0f791ced6d,b840b5e98d2d46f19a780e0491fecec9,fsx,LOCAL,/home/fsx/repos/pso-gp/src/best_evolved.py,1
2246,71dbdc0aa569471f94f8b1b222cedc07,904979821256722620,FINISHED,file:///home/fsx/repos/pso-gp/mlruns/904979821...,2025-03-31 21:58:30.145000+00:00,2025-03-31 21:58:30.704000+00:00,15.285055,,,15.581485,...,5.0,5534,100,gifted-grouse-538,dd4142bce4c88de4b1fdc8dd2b79af0f791ced6d,b840b5e98d2d46f19a780e0491fecec9,fsx,LOCAL,/home/fsx/repos/pso-gp/src/best_evolved.py,1
2247,3f6552ec11634b0d8d6720eb9188a912,904979821256722620,FINISHED,file:///home/fsx/repos/pso-gp/mlruns/904979821...,2025-03-31 21:58:29.530000+00:00,2025-03-31 21:58:30.104000+00:00,15.853676,,,15.150489,...,5.0,5534,100,puzzled-flea-714,dd4142bce4c88de4b1fdc8dd2b79af0f791ced6d,b840b5e98d2d46f19a780e0491fecec9,fsx,LOCAL,/home/fsx/repos/pso-gp/src/best_evolved.py,1
2248,fee43e89f9544fe98be23569fced1fb3,904979821256722620,FINISHED,file:///home/fsx/repos/pso-gp/mlruns/904979821...,2025-03-31 21:58:28.631000+00:00,2025-03-31 21:58:29.488000+00:00,6.698216,,,6.183231,...,5.0,5534,100,powerful-croc-61,dd4142bce4c88de4b1fdc8dd2b79af0f791ced6d,b840b5e98d2d46f19a780e0491fecec9,fsx,LOCAL,/home/fsx/repos/pso-gp/src/best_evolved.py,1


In [60]:
df = runs_bm.copy()

# Rename columns to remove "metrics.test_fit_"
df_renamed = df.rename(
    columns={col: col.replace("metrics.test_fit_", "") for col in df.columns}
)

for d in dims:
    df_dim = df_renamed[df_renamed["params.dim"] == str(d)]
    results_dim = results_path / "benchmarks" / f"dim_{d}"
    results_dim.mkdir(parents=True, exist_ok=True)
    # Pivot and compute median and std
    agg_funcs = ["median", "std"]
    agg_df = (
        df_dim[df_dim["params.dim"] == str(d)]
        .groupby(["params.run_type"])
        .agg({benchmark: agg_funcs for benchmark in benchmark_names})
    )
    # agg_df = agg_df.reset_index()

    # # Reshape the DataFrame to desired format
    # agg_df = agg_df.pivot(index="params.run_type", columns="params.dim")
    agg_df.columns = [f"{benchmark}_{stat}" for (benchmark, stat) in agg_df.columns]
    # agg_df = agg_df.reset_index()

    # get index of min value for each column
    best_medians = agg_df[
        [col for col in agg_df.columns if col.endswith("_median")]
    ].idxmin(axis=0)
    best_medians.index = best_medians.index.str.replace("_median", "")
    best_medians = best_medians.to_dict()
    # get agg_df_ours where the rows all start with "X-"
    agg_df_ours = agg_df[agg_df.index.str.startswith("X-")]
    best_medians_ours = agg_df_ours[
        [col for col in agg_df.columns if col.endswith("_median")]
    ].idxmin(axis=0)
    best_medians_ours.index = best_medians_ours.index.str.replace("_median", "")
    best_medians_ours = best_medians_ours.to_dict()

    df_median = agg_df.copy()

    for b in sorted(benchmark_names):
        median_col = f"{b}_median"
        std_col = f"{b}_std"
        joined_col = b  # new name: just the run_type

        df_median[joined_col] = agg_df.apply(
            lambda row: f"{row[median_col]:.2f} ± {row[std_col]:.2f}"
            if pd.notna(row[median_col]) and pd.notna(row[std_col])
            else "-",
            axis=1,
        )
    # Step 3: drop all the *_median and *_std columns
    cols_to_drop = [f"{b}_median" for b in benchmark_names] + [
        f"{b}_std" for b in benchmark_names
    ]
    df_median = df_median.drop(columns=cols_to_drop)

    df_formatted = df_median.copy()

    benchmark_dfs = {}
    p_values = {}
    for benchmark in df_formatted.columns:
        best_ours = best_medians_ours[benchmark]
        # Pivot to get rows indexed by dataset_id, columns by run_type
        pivoted = df_dim.pivot_table(
            index="params.dataset_id", columns="params.run_type", values=benchmark
        )
        # filter pivoted by removing rows where best_ours col is nan
        pivoted = pivoted[pivoted[best_ours].notna()]

        benchmark_dfs[benchmark] = pivoted
        p_values[benchmark] = {}
        for run_type in df_formatted.index:
            val = df_formatted.loc[run_type, benchmark]
            is_best = run_type == best_medians[benchmark]
            is_ours = run_type.startswith("X-")
            # Check if it's significant (p < 0.05 vs baseline)
            is_significant = False
            # now we calculate the p-value for the run_type if not is_ours vs best_ours
            if not is_ours:
                pval = None
                if run_type in benchmark_dfs[benchmark].columns:
                    stat, pval = wilcoxon(
                        benchmark_dfs[benchmark][best_ours],
                        benchmark_dfs[benchmark][run_type],
                        alternative="two-sided",
                    )
                is_significant = pval < 0.05
                p_values[benchmark][run_type] = pval.round(4)

            # Format LaTeX string
            if is_best:
                val = r"\underline{" + val + "}"
            if is_significant:
                val = r"\textbf{" + val + "}"

            df_formatted.loc[run_type, benchmark] = val

    # create p-values table with columns "benchmark" and rows with the run_types and the p-values as the values
    df_pvalues = pd.DataFrame(p_values)

    # save to latex
    df_pvalues.rename(
        index={
            "pso test": "PSO",
            "psocd1 test": "PSO-CD1",
            "psodisp2 test": "PSO-DISP2",
            "psog3 test": "PSO-G3",
        },
        inplace=True,
    )
    df_pvalues.index = df_pvalues.index.str.capitalize()
    df_pvalues.columns = df_pvalues.columns.str.capitalize()
    df_pvalues.reset_index().to_latex(
        results_dim / "p_values_vs_best.tex",
        index=False,
        escape=False,
        column_format="l" + "c" * len(df_pvalues.columns),
    )

    df_formatted.rename(
        columns={
            "pso": "PSO",
            "psocd1": "PSO-CD1",
            "psodisp2": "PSO-DISP2",
            "psog3": "PSO-G3",
        },
        inplace=True,
    )
    df_formatted.index = df_formatted.index.str.upper()
    df_formatted.columns = df_formatted.columns.str.capitalize()

    df_formatted.reset_index().to_latex(
        results_dim / "benchmarks_table.tex",
        index=False,
        escape=False,
        column_format="l" + "c" * len(df_formatted.columns),
    )

In [59]:
df_pvalues

Unnamed: 0,ackley,cigar,easom,griewank,levy,rastrigin,rosenbrock,schwefel,sphere,zakharov
pso,0.0,0.0,0.9836,0.0,0.0,0.0,0.0,0.0,0.0,0.0
psocd1,0.0,0.0,0.7151,0.0,0.0,0.0,0.0,0.0,0.0,0.0
psodisp2,0.0,0.0,0.9515,0.0,0.0,0.0,0.0,0.0,0.0,0.0
psog3,0.0,0.0,0.184,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [51]:
df_formatted

Unnamed: 0_level_0,Ackley,Cigar,Easom,Griewank,Levy,Rastrigin,Rosenbrock,Schwefel,Sphere,Zakharov
params.run_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
X-1B1E-50,11.19 ± 0.11,\underline{6.14 ± 0.06},-,-,\underline{11.31 ± 0.14},-,7.64 ± 0.07,-,5.98 ± 0.06,-
X-3462-100,-,9.87 ± 0.06,32.62 ± 0.24,-,11.35 ± 0.11,-,-,9.85 ± 0.07,9.87 ± 0.06,-
X-C7EC-100,-,-,-,7.69 ± 0.14,-,\underline{13.08 ± 0.11},\underline{6.49 ± 0.09},\underline{4.65 ± 0.05},-,17.57 ± 0.88
X-D0A8-30,-,8.64 ± 0.09,-,-,11.70 ± 0.14,-,9.67 ± 0.10,8.61 ± 0.08,-,\underline{12.48 ± 0.29}
X-D457-100,\underline{8.98 ± 0.13},-,\underline{32.59 ± 0.25},\underline{5.35 ± 0.15},-,13.74 ± 0.14,-,-,\underline{2.46 ± 0.03},-
PSO,\textbf{17.28 ± 0.24},\textbf{15.40 ± 0.42},32.71 ± 0.24,\textbf{15.55 ± 0.16},\textbf{26.91 ± 0.31},\textbf{24.83 ± 0.19},\textbf{17.83 ± 0.29},\textbf{12.82 ± 0.20},\textbf{12.80 ± 0.19},\textbf{29.13 ± 0.26}
PSOCD1,\textbf{27.89 ± 0.11},\textbf{27.83 ± 0.11},32.68 ± 0.21,\textbf{27.76 ± 0.10},\textbf{29.59 ± 0.22},\textbf{28.95 ± 0.18},\textbf{28.49 ± 0.13},\textbf{27.75 ± 0.11},\textbf{27.76 ± 0.11},\textbf{41.70 ± 0.81}
PSODISP2,\textbf{27.86 ± 0.10},\textbf{27.82 ± 0.11},32.62 ± 0.28,\textbf{27.73 ± 0.11},\textbf{29.52 ± 0.20},\textbf{28.99 ± 0.17},\textbf{28.48 ± 0.16},\textbf{27.74 ± 0.09},\textbf{27.74 ± 0.08},\textbf{44.11 ± 7.73}
PSOG3,\textbf{11.68 ± 0.08},\textbf{11.38 ± 0.08},32.67 ± 0.23,\textbf{12.22 ± 0.10},\textbf{12.23 ± 0.12},\textbf{13.32 ± 0.14},\textbf{11.62 ± 0.10},\textbf{11.36 ± 0.09},\textbf{11.39 ± 0.07},\textbf{14.18 ± 0.27}


In [44]:
df_p = runs_bm.copy()
df_p = df_p[df_p["params.run_type"] != "test"]
# set params.run_type to str(value) without slicing
all_run_types = df_p["params.run_type"].unique().tolist()
all_run_types

['X-1B1E-50',
 'psodisp2',
 'psocd1',
 'psog3',
 'pso',
 'X-C7EC-100',
 'X-3462-100',
 'X-D457-100',
 'X-D0A8-30']

In [None]:
df_p = runs_bm.copy()
# Create a dictionary to store benchmark-specific DataFrames
benchmark_dfs = {}

# Loop over each benchmark
for benchmark in benchmark_names:
    benchmark_col = f"metrics.test_fit_{benchmark}"

    # Pivot to get rows indexed by dataset_id, columns by run_type
    pivoted = df_p[df_p["params.dim"] == "100"].pivot_table(
        index="params.dataset_id", columns="params.run_type", values=benchmark_col
    )

    benchmark_dfs[benchmark] = pivoted

benchmark_dfs.keys()


dict_keys(['ackley', 'cigar', 'easom', 'griewank', 'levy', 'rastrigin', 'rosenbrock', 'schwefel', 'sphere', 'zakharov'])

In [49]:
benchmark_dfs["ackley"]

params.run_type,X-1B1E-50,X-D457-100,pso,psocd1,psodisp2,psog3
params.dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
20250331-225913,,9.123562,16.782919,27.964874,28.059553,11.769372
20250331-225927,11.130814,,16.954588,27.93675,27.847828,11.673428
20250331-230019,,9.193104,17.514906,27.937496,28.018869,11.757885
20250331-230033,11.245932,,17.356155,27.778843,27.705343,11.67379
20250331-230124,,8.869163,17.346722,27.849985,27.911402,11.798911
20250331-230138,11.16605,,17.01754,27.933716,27.885672,11.634333
20250331-230228,,9.199385,17.300323,27.686125,27.985781,11.635124
20250331-230242,11.222472,,16.983877,27.646177,27.93017,11.756392
20250331-230333,,8.977935,16.906746,27.880007,27.763247,11.691112
20250331-230347,11.194518,,17.349445,27.930923,27.884205,11.605352


In [None]:
# Build dict to store p-value tables per benchmark
pval_tables = {}
baseline = "gp+pso test"
for benchmark in benchmark_names:
    col = f"metrics.test_fit_{benchmark}"

    # Pivot table to align paired values
    pivoted = df_p.pivot_table(
        index="params.dataset_id", columns="params.run_type", values=col
    ).dropna(subset=[baseline])

    # Store p-values comparing to baseline
    pvals = {}

    for b in all_run_types:
        if b == baseline:
            continue
        if b not in pivoted.columns:
            continue

        # Drop rows where either value is missing
        paired = pivoted[[baseline, b]].dropna()

        try:
            stat, p = wilcoxon(paired[baseline], paired[b], alternative="two-sided")
            pvals[b] = p
        except ValueError:
            pvals[b] = None  # e.g., not enough data or all values are equal

    # Convert to DataFrame
    pval_tables[benchmark] = pd.DataFrame.from_dict(
        pvals, orient="index", columns=["p-value"]
    )
    pval_tables[benchmark]["significant"] = pval_tables[benchmark]["p-value"] < 0.05

In [None]:
summary_rows = []

for benchmark in benchmark_names:
    col = f"metrics.test_fit_{benchmark}"

    # Drop rows with missing values in this benchmark
    df_filtered = df_p[["params.run_type", "params.dataset_id", col]].dropna(
        subset=[col]
    )

    # Group by run_type
    group_stats = (
        df_filtered.groupby("params.run_type")[col]
        .agg(["median", "std"])
        .to_dict(orient="index")
    )

    # Build row
    row = {"benchmark": benchmark}
    for run_type, stats in group_stats.items():
        row[f"{run_type}_median"] = stats["median"]
        row[f"{run_type}_std"] = stats["std"]

    summary_rows.append(row)

# Create DataFrame
summary_df = pd.DataFrame(summary_rows).set_index("benchmark").round(5)


In [186]:
summary_df

Unnamed: 0_level_0,gp+pso test_median,gp+pso test_std,pso test_median,pso test_std,psocd1 test_median,psocd1 test_std,psodisp2 test_median,psodisp2 test_std,psog3 test_median,psog3 test_std
benchmark,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ackley,6.24352,1.31665,8.54543,0.25754,18.70283,0.10574,18.70079,0.10395,7.98198,0.08169
cigar,3.60748,1.31349,11.12238,0.61062,18.77268,0.11227,18.76325,0.11722,7.6416,0.08553
easom,9.77746,8.03367,23.524,0.21562,23.53323,0.22713,23.56021,0.23019,23.53757,0.23373
griewank,5.45866,1.23706,9.51357,0.33724,18.5878,0.10083,18.5819,0.10095,8.59608,0.12807
levy,7.93717,0.45832,17.69006,0.31898,20.5959,0.21956,20.43578,0.19863,8.75079,0.12302
rastrigin,9.81943,0.33732,16.01258,0.21388,19.89701,0.16711,19.85454,0.15125,9.53267,0.13056
rosenbrock,4.61117,1.12418,10.55131,0.44889,19.3346,0.14721,19.30025,0.14995,7.92533,0.08987
schwefel,3.38212,1.5977,5.56929,0.20277,18.57533,0.10303,18.59863,0.10093,7.5989,0.08399
sphere,3.38056,1.59886,5.57909,0.20524,18.59278,0.09842,18.5856,0.10021,7.60082,0.07849
zakharov,8.88832,1.45977,17.14469,0.19967,25.65448,0.49538,28.50418,0.55533,9.99354,0.18651


In [None]:
# get column of min value for each row
best_medians = summary_df[
    [col for col in summary_df.columns if col.endswith("_median")]
].idxmin(axis=1)

best_medians = best_medians.str.replace("_median", "")

In [None]:
best_medians

In [None]:
df_median = summary_df.copy()

for b in sorted(all_run_types):
    median_col = f"{b}_median"
    std_col = f"{b}_std"
    joined_col = b  # new name: just the run_type

    df_median[joined_col] = summary_df.apply(
        lambda row: f"{row[median_col]:.2f} ± {row[std_col]:.2f}", axis=1
    )
# Step 3: drop all the *_median and *_std columns
cols_to_drop = [f"{rt}_median" for rt in all_run_types] + [
    f"{rt}_std" for rt in all_run_types
]
df_median = df_median.drop(columns=cols_to_drop)

df_formatted = df_median.copy()

for benchmark in df_formatted.index:
    for run_type in df_formatted.columns:
        val = df_formatted.loc[benchmark, run_type]
        is_best = run_type == best_medians[benchmark]

        # Check if it's significant (p < 0.05 vs baseline)
        is_significant = False
        pval_df = pval_tables.get(benchmark)
        if run_type != baseline and pval_df is not None and run_type in pval_df.index:
            pval = pval_df.loc[run_type, "p-value"]
            is_significant = pd.notna(pval) and pval < 0.05

        # Format LaTeX string
        if is_best:
            val = r"\underline{" + val + "}"
        if is_significant:
            val = r"\textbf{" + val + "}"

        df_formatted.loc[benchmark, run_type] = val

df_formatted.rename(
    columns={
        "gp+pso test": "GP-PSO-TEST",
        "pso test": "PSO",
        "psocd1 test": "PSO-CD1",
        "psodisp2 test": "PSO-DISP2",
        "psog3 test": "PSO-G3",
    },
    inplace=True,
)
df_formatted.index = df_formatted.index.str.capitalize()

df_formatted.reset_index().to_latex(
    results_dim / "benchmarks_table.tex",
    index=False,
    escape=False,
    column_format="l" + "c" * len(df_formatted.columns),
)