# Comparison with other libraries

This notebooks extends 'evobandits_demo.ipynb' to compare the GMAB algorithm with popular alternatives.

Setup:

In [None]:
import json
from pathlib import Path

from evobandits import CategoricalParam, Study, GMAB
from irace import irace, Experiment, ParameterSpace, Scenario, Ordinal
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import numpy as np
from tqdm import tqdm

In [None]:
from application_example import (
    genetic_algorithm,
    TSP_OPT_COST
)

In [None]:
plt.style.use("default")

mpl.rcParams["font.family"] = "serif"
mpl.rcParams["font.serif"] = [
    "Computer Modern Roman",
    "Times New Roman",
    "Times",
    "DejaVu Serif",
]
mpl.rcParams["font.size"] = 14

## 1. Presets for the Optimization

Identical number of runs, budget and a seed for reproduction:

In [None]:
SEED = 42
N_RUNS = 50
SIM_BUDGET = 1000
EVAL_BUDGET = 500

All variables are modeled as ordinal variables from the ranges below to ensure identical search space.

In [None]:
POP_SIZE_RANGE = [i for i in range(50, 251, 1)]
ELITE_SPLIT_RANGE = [i * 0.01 for i in range(21)]
TOURNAMENT_SPLIT_RANGE = [i * 0.01 for i in range(11)]
CROSSOVER_RATE_RANGE = [i * 0.01 for i in range(101)]
MUTATION_RATE_RANGE = [i * 0.01 for i in range(101)]

Size of the Search space

In [None]:
total_combinations = len(POP_SIZE_RANGE) * len(ELITE_SPLIT_RANGE) \
    * len(TOURNAMENT_SPLIT_RANGE) * len(CROSSOVER_RATE_RANGE) * len(MUTATION_RATE_RANGE)

print(f"Number of distinct solutions:\t{total_combinations:,}")

The number of generations for the genetic algorithm is fixed to 300.

In [None]:
GENERATIONS = 300

### Why fix the number of generations?

This is done to ensure that result for different configurations actually differ - the genetic algorithm achieves good performance for (nearly) all configurations for higher number of generations, which means optimization is not really needed.

In [None]:
def randomize_configuration(seed):
    rng = np.random.default_rng(seed)
    return {
        "pop_size": int(rng.choice(POP_SIZE_RANGE)),
        "elite_split": float(rng.choice(ELITE_SPLIT_RANGE)),
        "tournament_split": float(rng.choice(TOURNAMENT_SPLIT_RANGE)),
        "crossover_rate": float(rng.choice(CROSSOVER_RATE_RANGE)),
        "mutation_rate": float(rng.choice(MUTATION_RATE_RANGE)),
    }

In [None]:
rng = np.random.default_rng(SEED)
n_configs = 10
n_samples = 20 # per generation and config
generations = [100, 200, 300, 400, 500]

# Create a number of configurations to compare
# Best result from 'evobandits_demo.ipynb' is used as reference configuration
configurations = dict(
    {
        "Reference": {
            "pop_size": 250,
            "elite_split": 0.02,
            "tournament_split": 0.09,
            "mutation_rate": 0.87,
            "crossover_rate": 0.06,
        },
    }
)
for i in range(1, n_configs):
    seed = rng.integers(0, 2 ** 32 - 1, dtype=int)
    configurations.update({f"Random_{i}": randomize_configuration(seed)})

# Collect samples for each generation and number of generations
results = {
    "configurations": configurations,
    "generations": generations,
}
for name, config in configurations.items():
    results[name] = {}

    for gen in generations:
        gen_results = []
        for _ in tqdm(range(n_samples), desc=f"{name} | Gen {gen}"):
            seed = rng.integers(0, 2**32 - 1, dtype=int)
            cost, _ = genetic_algorithm(
                generations=gen,
                pop_size=250,
                elite_split=0.10,
                tournament_split=0.05,
                crossover_rate=0.04,
                mutation_rate=config["mutation_rate"],
                seed=seed,
            )
            gen_results.append(cost)
        results[name][gen] = gen_results

json.dump(results, open(Path("_data/03_comparison_generations_cnt.json"), 'w'))

In [None]:
plt.figure(figsize=(8, 5))
lines = []

for name, config in configurations.items():
    gen_results = results[name]
    means = [np.mean(gen_results[gen]) for gen in generations]
    stds = [np.std(gen_results[gen]) for gen in generations]

    if name == "Reference":
        # Highlight the reference config
        plt.errorbar(
            generations,
            means,
            yerr=stds,
            label="Reference Configuration",
            capsize=4,
            marker="o",
            color="tab:blue",
            linewidth=2.5,
        )
    else:
        plt.plot(
            generations,
            means,
            color="C7",
            linewidth=1.2,
            alpha=0.6,
            linestyle="--",
        )

# Proxy line as label for all randomized configurations
proxy_line = mlines.Line2D([], [], label="Randomized Configurations")

handles, labels = plt.gca().get_legend_handles_labels()
handles.append(proxy_line)
labels.append(proxy_line.get_label())
plt.legend(handles=handles, labels=labels)

plt.xlabel("Number of Generations")
plt.ylabel("Total Distance")
plt.grid()
plt.tight_layout()
plt.savefig(Path("_plots/06_comparison_ga_generations.pdf"))
plt.show()

## 3. Optimization

For each optimizer:
- Model the genetic_algorithm as objective function
- Model the search space
- Configure the algorithm
- Execute for preset, identical budget and runs
- Collect the results and re-sample to evaluate the "true" value of the configuration.

In [1]:
def estimate_true_value(seed, ga_configuration):
    evaluations = []
    rng = np.random.default_rng(seed)
    for _ in range(EVAL_BUDGET):
        seed_eval = rng.integers(0, 2**32 - 1, dtype=int)
        best_cost = genetic_algorithm(seed=seed_eval, generations=GENERATIONS, **ga_configuration)
        evaluations.append(best_cost)
    mean_evaluation = np.mean(evaluations)
    return mean_evaluation, evaluations

### 2.1 EvoBandits

In [None]:
def objective(seed: int, **params: dict):
    """Seeded, single-objective function to simulate the GA."""
    best_cost, _ = genetic_algorithm(seed=seed, generations=GENERATIONS, **params)
    return best_cost

params = {
    "pop_size": CategoricalParam(POP_SIZE_RANGE),
    "elite_split": CategoricalParam(ELITE_SPLIT_RANGE), 
    "tournament_split": CategoricalParam(TOURNAMENT_SPLIT_RANGE),
    "mutation_rate": CategoricalParam(MUTATION_RATE_RANGE), 
    "crossover_rate": CategoricalParam(CROSSOVER_RATE_RANGE), 
}

print("\nRunning optimization ...")
results_evobandits = []

for run_id in tqdm(range(N_RUNS), desc="EvoBandits | Run"):
    seed = SEED + run_id
    gmab = GMAB(population_size=10)
    study = Study(seed=seed, algorithm=gmab)
    study.optimize(objective, params, n_trials=SIM_BUDGET)
    print(f"Config:\t{study.best_params}")
    print(f"Value:\t{study.best_value}")

    mean_evaluation, evaluations = estimate_true_value(seed, study.best_params)
    print(f"Est. true value:\t{mean_evaluation}")

    results_evobandits.append({
        "mean_evaluation": mean_evaluation,
        "evaluations": evaluations,
        "best_solution": study.best_solution,
        "seed": seed
    })

    with open(Path("_data/04_results_evobandits.json"), 'w') as f:
        json.dump(results_evobandits, f)

### 2.2 Random Sampling

In [None]:
def random_search(budget, seed):
    # Evaluate the GA with random configurations
    random_search_results = {}
    rng = np.random.default_rng(seed)
    for _ in range(budget):
        seed = rng.integers(0, 2**32 - 1, dtype=int)
        rnd_config = randomize_configuration(seed)
        cost, _ = genetic_algorithm(seed=seed, generations=GENERATIONS, **rnd_config)
        random_search_results[cost] = rnd_config

    # Find the config with the lowest cost
    best_value = min(random_search_results.keys())
    best_config = random_search_results[best_value]
    return best_value, best_config

In [None]:
results_rnd_search = []

for run_id in tqdm(range(N_RUNS), desc="RandomSearch | Run"):
    print("\nRunning optimization ...")
    seed = SEED + run_id
    best_value, best_params = random_search(SIM_BUDGET, seed)
    print(f"Config:\t{best_params}")
    print(f"Value:\t{best_value}")

    mean_evaluation, evaluations = estimate_true_value(seed, best_params)
    print(f"Est. true value:\t{mean_evaluation}")

    results_rnd_search.append({
        "mean_evaluation": mean_evaluation,
        "evaluations": evaluations,
        "best_value": best_value,
        "best_params": best_params,
        "seed": seed,
    })

    with open(Path("_data/04_results_rnd_search.json"), 'w') as f:
        json.dump(results_rnd_search, f)

### 2.3 IRACE

In [None]:
def ga_runner(experiment: Experiment, scenario: Scenario):
    """Seeded, single-objective function to simulate the GA with irace."""
    best_cost, _ = genetic_algorithm(
        generations=GENERATIONS, seed=experiment.seed, **experiment.configuration
    )
    return float(best_cost)

param_space = ParameterSpace(
    [
        Ordinal("pop_size", POP_SIZE_RANGE),
        Ordinal("elite_split", ELITE_SPLIT_RANGE),
        Ordinal("tournament_split", TOURNAMENT_SPLIT_RANGE),
        Ordinal("crossover_rate", CROSSOVER_RATE_RANGE),
        Ordinal("mutation_rate", MUTATION_RATE_RANGE),
    ]
)

print("\nRunning optimization ...")
results_irace = []

for run_id in tqdm(range(N_RUNS), desc="IRACE | Run"):
    print("\nRunning optimization ...")
    seed = SEED + run_id
    scenario = Scenario(
        max_experiments=SIM_BUDGET,
        verbose=0,
    )
    best_params = irace(ga_runner, param_space, scenario)[0]
    print(f"Config:\t{best_params}")

    mean_evaluation, evaluations = estimate_true_value(seed, best_params)
    print(f"Est. true value:\t{mean_evaluation}")

    results_irace.append({
        "mean_evaluation": mean_evaluation,
        "evaluations": evaluations,
        "best_params": best_params,
        "seed": seed,
    })

    with open(Path("_data/04_results_irace.json"), 'w') as f:
        json.dump(results_irace, f)

## 3. Comparison

In [None]:
plt.figure(figsize=(8, 5))

values_evobandits = [r["mean_evaluation"] for r in results_evobandits]
values_irace = [r["mean_evaluation"] for r in results_irace]
values_rand_search = [r["mean_evaluation"] for r in results_rnd_search]

plt.boxplot(
    [values_evobandits, values_irace, values_rand_search], 
    tick_labels=["evobandits", "irace", "random search"], 
    showmeans=True, meanprops={"markerfacecolor":"black", "markeredgecolor":"black"}, medianprops={"color": "C0"})
plt.axhline(TSP_OPT_COST, linestyle='--', linewidth=1, color="C3", label="Optimal Cost")

plt.ylabel("Total Distance")

plt.legend()
plt.tight_layout()
plt.savefig(Path("_plots/02_evobandits_demo_results.pdf"))
plt.show()

In [None]:
plt.figure(figsize=(8, 5))

values_evobandits = [r["mean_evaluation"] for r in results_evobandits]
values_irace = [r["mean_evaluation"] for r in results_irace]
values_rand_search = [r["mean_evaluation"] for r in results_rnd_search]
plt.violinplot(
    [values_evobandits, values_irace, values_rand_search], 
    showmeans=True)
plt.axhline(TSP_OPT_COST, linestyle='--', linewidth=1, color="C3", label="Optimal Cost")

plt.ylabel("Total Distance")

plt.legend()
plt.tight_layout()
plt.savefig(Path("_plots/06_comparison_by_optimizer.pdf"))
plt.show()