# μ+λ Evolution Strategy Experiments with Best Parameters (30 Seeds)

This notebook runs μ+λ experiments with the best configuration using 30 different seeds for statistical analysis.

## 1. Import Libraries

In [1]:
import os
import sys
import sqlite3
import time
import random
import numpy as np
import multiprocessing as mp

# Add project root to path
ROOT_DIR = os.path.dirname(os.path.abspath(os.getcwd()))
sys.path.append(ROOT_DIR)

from algorithms.mulambda_runner import run_mulambda

print("✓ Libraries imported successfully")

✓ Libraries imported successfully


## 2. Database Setup

In [2]:
DB_PATH = "../data/mulambda_experiments.db"

def init_mulambda_db(db_path):
    """Initialize database for μ+λ results"""
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    
    cur.execute("""
        CREATE TABLE IF NOT EXISTS runs (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            mu INTEGER,
            lambda_ INTEGER,
            ngen INTEGER,
            cxpb REAL,
            mutpb REAL,
            seed INTEGER,
            best_penalized REAL,
            best_clean REAL,
            time_sec REAL,
            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
        )
    """)
    
    con.commit()
    con.close()
    print(f"✓ Database initialized: {db_path}")

init_mulambda_db(DB_PATH)

✓ Database initialized: mulambda_experiments.db


## 3. Save Results Function

In [3]:
def save_mulambda_run(db_path, mu, lambda_, ngen, cxpb, mutpb, seed, best_penalized, best_distance, time_sec):
    """Save a single μ+λ run to database"""
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    
    cur.execute("""
        INSERT INTO runs (
            mu, lambda_, ngen, cxpb, mutpb, seed,
            best_penalized, best_clean, time_sec
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
    """, (
        mu, lambda_, ngen, cxpb, mutpb, seed,
        float(best_penalized), float(best_distance), time_sec
    ))
    
    con.commit()
    con.close()

print("✓ Save function defined")

✓ Save function defined


## 4. Worker Function for Multiprocessing

In [4]:
def mulambda_worker(args):
    """Worker function to run a single μ+λ experiment"""
    mu, lambda_, ngen, cxpb, mutpb, seed, db_path = args
    
    t0 = time.time()
    try:
        results = run_mulambda(
            mu=mu,
            lambda_=lambda_,
            ngen=ngen,
            cxpb=cxpb,
            mutpb=mutpb,
            seed=seed,
            show_plots=False,
            show_anim=False,
            debug_interval=max(1, ngen // 10)
        )
        t1 = time.time()
        
        save_mulambda_run(db_path, mu, lambda_, ngen, cxpb, mutpb, seed, 
                         results['best_penalized'], results['best_distance'], t1 - t0)
        
        return {
            'success': True,
            'seed': seed,
            'best_penalized': results['best_penalized'],
            'time': t1 - t0
        }
    except Exception as e:
        return {
            'success': False,
            'seed': seed,
            'error': str(e)
        }

print("✓ Worker function defined")

✓ Worker function defined


## 5. Best Parameters Configuration

In [5]:
# Best parameters for μ+λ (from grid search: mu=120, lambda=120, ngen=800, cxpb=0.5)
BEST_PARAMS = {
    'mu': 120,
    'lambda_': 50,
    'ngen': 800,
    'cxpb': 0.5,
    'mutpb': 0.5  # mutpb = 1.0 - cxpb
}

# Seeds for statistical analysis (31 runs: 0-30)
SEEDS = list(range(31))

# Number of parallel processes
N_JOBS = min(8, mp.cpu_count())

print(f"Best parameters: {BEST_PARAMS}")
print(f"Number of seeds: {len(SEEDS)}")
print(f"Number of parallel jobs: {N_JOBS}")
print(f"Total experiments: {len(SEEDS)}")

Best parameters: {'mu': 120, 'lambda_': 50, 'ngen': 800, 'cxpb': 0.5, 'mutpb': 0.5}
Number of seeds: 31
Number of parallel jobs: 8
Total experiments: 31


## 6. Run Experiments with Multithreading

In [6]:
from tqdm.notebook import tqdm

# Generate tasks (one per seed, no nested loops)
tasks = [(BEST_PARAMS['mu'], BEST_PARAMS['lambda_'], BEST_PARAMS['ngen'], 
          BEST_PARAMS['cxpb'], BEST_PARAMS['mutpb'], seed, DB_PATH) for seed in SEEDS]

print(f"Total experiments to run: {len(tasks)}")
print(f"Running with {N_JOBS} parallel workers...\n")

# Run experiments in parallel
start_time = time.time()
with mp.Pool(processes=N_JOBS) as pool:
    results = list(tqdm(
        pool.imap(mulambda_worker, tasks),
        total=len(tasks),
        desc="μ+λ Experiments"
    ))
total_time = time.time() - start_time

# Summary
successful = sum(1 for r in results if r['success'])
failed = len(results) - successful

print(f"\n{'='*60}")
print(f"Experiments completed!")
print(f"Successful: {successful}")
print(f"Failed: {failed}")
print(f"Total time: {total_time/60:.2f} minutes")
print(f"Average time per run: {total_time/len(tasks):.2f} seconds")
print(f"{'='*60}")

Total experiments to run: 31
Running with 8 parallel workers...

[INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:

μ+λ Experiments:   0%|          | 0/31 [00:00<?, ?it/s]

[INFO] Valores únicos del BMP original: [INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:    [INFO] Valores únicos del BMP original:[  0 255][  0 255] [  0 255] [  0 255]
[  0 255]
[  0 255]


[  0 255][INFO] Valores únicos tras invertir:[INFO] Valores únicos del BMP original:[INFO] Valores únicos tras invertir:[INFO] Valores únicos tras invertir:
[INFO] Valores únicos tras invertir:
    [INFO] Valores únicos tras invertir:[INFO] Valores únicos tras invertir: [  0 255][  0 255][  0 255] [  0 255] [  0 255]
[INFO] Valores únicos tras invertir:[  0 255]

[  0 255]

[INFO] Environment cargado: 92 x 113[INFO] Environment cargado: 92 x 113 
[INFO] Environment cargado: 92 x 113[INFO] Environment cargado: 92 x 113[INFO] Valores únicos tras invertir:



[  0 255] [INFO] Environment cargado: 92 x 113

[INFO] Environment cargado: 92 x 113[  0 255]
[INFO] Environment cargado: 92 x 113


[INFO] Environment cargado: 92 x 113
[GEN  

## 7. Statistical Analysis

In [7]:
import pandas as pd
from scipy import stats

# Load results from database
con = sqlite3.connect(DB_PATH)
df = pd.read_sql_query("SELECT * FROM runs", con)
con.close()

print(f"Total runs in database: {len(df)}")
df.head()

Total runs in database: 31


Unnamed: 0,id,mu,lambda_,ngen,cxpb,mutpb,seed,best_penalized,best_clean,time_sec,timestamp
0,1,120,50,800,0.5,0.5,4,1618.243506,1089.293506,188.593172,2025-12-11 11:02:55
1,2,120,50,800,0.5,0.5,0,1104.65873,1082.90873,190.382614,2025-12-11 11:02:56
2,3,120,50,800,0.5,0.5,1,1298.978787,1177.778787,194.603116,2025-12-11 11:03:01
3,4,120,50,800,0.5,0.5,3,1404.086147,1205.536147,197.450833,2025-12-11 11:03:03
4,5,120,50,800,0.5,0.5,5,1316.220923,1161.920923,200.474139,2025-12-11 11:03:07


In [8]:
# Statistical summary
print("\n" + "="*80)
print("μ+λ STATISTICAL SUMMARY")
print("="*80)
print(f"\nConfiguration: mu={BEST_PARAMS['mu']}, lambda={BEST_PARAMS['lambda_']}, "
      f"ngen={BEST_PARAMS['ngen']}, cxpb={BEST_PARAMS['cxpb']}, mutpb={BEST_PARAMS['mutpb']}")
print(f"Number of runs: {len(df)}\n")

# Best Penalized
print(f"Best Penalized:")
print(f"  Mean: {df['best_penalized'].mean():.2f}")
print(f"  Std: {df['best_penalized'].std():.2f}")
print(f"  Min: {df['best_penalized'].min():.2f}")
print(f"  Max: {df['best_penalized'].max():.2f}")
print(f"  Median: {df['best_penalized'].median():.2f}")

# Best Clean
print(f"\nBest Clean:")
print(f"  Mean: {df['best_clean'].mean():.2f}")
print(f"  Std: {df['best_clean'].std():.2f}")
print(f"  Min: {df['best_clean'].min():.2f}")
print(f"  Max: {df['best_clean'].max():.2f}")
print(f"  Median: {df['best_clean'].median():.2f}")

# Computation Time
print(f"\nComputation Time (seconds):")
print(f"  Mean: {df['time_sec'].mean():.2f}")
print(f"  Std: {df['time_sec'].std():.2f}")
print(f"  Min: {df['time_sec'].min():.2f}")
print(f"  Max: {df['time_sec'].max():.2f}")

# 95% Confidence intervals
ci_penalized = stats.t.interval(0.95, len(df)-1, 
                                 loc=df['best_penalized'].mean(),
                                 scale=stats.sem(df['best_penalized']))
ci_clean = stats.t.interval(0.95, len(df)-1,
                            loc=df['best_clean'].mean(),
                            scale=stats.sem(df['best_clean']))

print(f"\n95% Confidence Intervals:")
print(f"  Best Penalized: [{ci_penalized[0]:.2f}, {ci_penalized[1]:.2f}]")
print(f"  Best Clean: [{ci_clean[0]:.2f}, {ci_clean[1]:.2f}]")


μ+λ STATISTICAL SUMMARY

Configuration: mu=120, lambda=50, ngen=800, cxpb=0.5, mutpb=0.5
Number of runs: 31

Best Penalized:
  Mean: 1294.01
  Std: 115.99
  Min: 1104.66
  Max: 1618.24
  Median: 1277.88

Best Clean:
  Mean: 1146.82
  Std: 40.81
  Min: 1082.91
  Max: 1205.58
  Median: 1157.44

Computation Time (seconds):
  Mean: 189.92
  Std: 11.28
  Min: 164.36
  Max: 207.68

95% Confidence Intervals:
  Best Penalized: [1251.47, 1336.56]
  Best Clean: [1131.85, 1161.79]


## 8. Normality Tests

In [9]:
# Shapiro-Wilk test for normality
stat_pen, p_pen = stats.shapiro(df['best_penalized'])
stat_clean, p_clean = stats.shapiro(df['best_clean'])

print("\n=== Normality Tests (Shapiro-Wilk) ===")
print(f"Best Penalized: stat={stat_pen:.4f}, p-value={p_pen:.4f}")
print(f"  -> {'Normal distribution' if p_pen > 0.05 else 'Not normal distribution'} (α=0.05)")
print(f"\nBest Clean: stat={stat_clean:.4f}, p-value={p_clean:.4f}")
print(f"  -> {'Normal distribution' if p_clean > 0.05 else 'Not normal distribution'} (α=0.05)")


=== Normality Tests (Shapiro-Wilk) ===
Best Penalized: stat=0.9644, p-value=0.3801
  -> Normal distribution (α=0.05)

Best Clean: stat=0.9087, p-value=0.0120
  -> Not normal distribution (α=0.05)


## 9. Export Results

In [10]:
# Export all results to CSV
df.to_csv('../results/individual/mulambda_all_results.csv', index=False)
print("✓ All results exported to mulambda_all_results.csv")

# Export summary statistics
summary = pd.DataFrame({
    'metric': ['best_penalized', 'best_clean', 'time_sec'],
    'mean': [df['best_penalized'].mean(), df['best_clean'].mean(), df['time_sec'].mean()],
    'std': [df['best_penalized'].std(), df['best_clean'].std(), df['time_sec'].std()],
    'min': [df['best_penalized'].min(), df['best_clean'].min(), df['time_sec'].min()],
    'max': [df['best_penalized'].max(), df['best_clean'].max(), df['time_sec'].max()],
    'median': [df['best_penalized'].median(), df['best_clean'].median(), df['time_sec'].median()]
})
summary.to_csv('../results/individual/mulambda_statistical_summary.csv', index=False)
print("✓ Statistical summary exported to mulambda_statistical_summary.csv")

✓ All results exported to mulambda_all_results.csv
✓ Statistical summary exported to mulambda_statistical_summary.csv
