# NSGA-II Experiments with Multithreading

This notebook runs NSGA-II experiments with different parameter configurations using multithreading and saves results to a database.

## 1. Import Libraries

In [1]:
import os
import sys
import sqlite3
import time
import random
import numpy as np
import multiprocessing as mp
from typing import Dict, List, Tuple

# Add project root to path
ROOT_DIR = os.path.dirname(os.path.abspath(os.getcwd()))
sys.path.append(ROOT_DIR)

from algorithms.ga_runner_multi import run_ga_multi
from algorithms.ga_core import prepare_environment

print("✓ Libraries imported successfully")

✓ Libraries imported successfully


## 2. Database Setup

In [2]:
DB_PATH = "../data/nsga2_experiments.db"

def init_nsga2_db(db_path):
    """Initialize database for NSGA-II results"""
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    
    cur.execute("""
        CREATE TABLE IF NOT EXISTS runs (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            pop_size INTEGER,
            ngen INTEGER,
            cxpb REAL,
            mutpb REAL,
            seed INTEGER,
            best_penalized REAL,
            best_clean REAL,
            best_tradeoff_penalized REAL,
            best_tradeoff_clean REAL,
            pareto_size INTEGER,
            time_sec REAL,
            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
        )
    """)
    
    con.commit()
    con.close()
    print(f"✓ Database initialized: {db_path}")

init_nsga2_db(DB_PATH)

✓ Database initialized: nsga2_experiments.db


## 3. Save Results Function

In [3]:
def save_nsga2_run(db_path, pop_size, ngen, cxpb, mutpb, seed, results, time_sec):
    """Save a single NSGA-II run to database"""
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    
    best_pen = results['best_penalized']
    best_clean = results['best_clean']
    best_trade = results['best_tradeoff']
    
    cur.execute("""
        INSERT INTO runs (
            pop_size, ngen, cxpb, mutpb, seed,
            best_penalized, best_clean,
            best_tradeoff_penalized, best_tradeoff_clean,
            pareto_size, time_sec
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """, (
        pop_size, ngen, cxpb, mutpb, seed,
        float(best_pen.fitness.values[0]), float(best_pen.fitness.values[1]),
        float(best_trade.fitness.values[0]), float(best_trade.fitness.values[1]),
        len(results['pareto_front']), time_sec
    ))
    
    con.commit()
    con.close()

print("✓ Save function defined")

✓ Save function defined


## 4. Worker Function for Multiprocessing

In [4]:
def nsga2_worker(args):
    """Worker function to run a single NSGA-II experiment"""
    pop_size, ngen, cxpb, mutpb, seed, db_path = args
    
    t0 = time.time()
    try:
        results = run_ga_multi(
            pop_size=pop_size,
            ngen=ngen,
            cxpb=cxpb,
            mutpb=mutpb,
            seed=seed,
            show_plots=False,
            show_anim=False,
            debug_interval=max(1, ngen // 10)
        )
        t1 = time.time()
        
        save_nsga2_run(db_path, pop_size, ngen, cxpb, mutpb, seed, results, t1 - t0)
        
        return {
            'success': True,
            'params': (pop_size, ngen, cxpb, mutpb, seed),
            'time': t1 - t0
        }
    except Exception as e:
        return {
            'success': False,
            'params': (pop_size, ngen, cxpb, mutpb, seed),
            'error': str(e)
        }

print("✓ Worker function defined")

✓ Worker function defined


## 5. Define Parameter Grid

In [5]:
# Parameter configurations for NSGA-II
param_grid = {
    'pop_size': [120],
    'ngen': [1000],
    'cxpb': [0.8],
    'mutpb': [0.3]
}

# Seeds for multiple runs
SEEDS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]

# Number of parallel processes
N_JOBS = min(8, mp.cpu_count())

print(f"Parameter grid: {param_grid}")
print(f"Seeds: {SEEDS}")
print(f"Number of parallel jobs: {N_JOBS}")

Parameter grid: {'pop_size': [120], 'ngen': [1000], 'cxpb': [0.8], 'mutpb': [0.3]}
Seeds: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
Number of parallel jobs: 8


## 6. Run Experiments with Multithreading

In [6]:
import itertools
from tqdm.notebook import tqdm

# Generate all combinations
tasks = []
for pop_size in param_grid['pop_size']:
    for ngen in param_grid['ngen']:
        for cxpb in param_grid['cxpb']:
            for mutpb in param_grid['mutpb']:
                for seed in SEEDS:
                    tasks.append((pop_size, ngen, cxpb, mutpb, seed, DB_PATH))

print(f"Total experiments to run: {len(tasks)}")
print(f"Running with {N_JOBS} parallel workers...\n")

# Run experiments in parallel
with mp.Pool(processes=N_JOBS) as pool:
    results = list(tqdm(
        pool.imap(nsga2_worker, tasks),
        total=len(tasks),
        desc="NSGA-II Experiments"
    ))

# Summary
successful = sum(1 for r in results if r['success'])
failed = len(results) - successful

print(f"\n{'='*60}")
print(f"Experiments completed!")
print(f"Successful: {successful}")
print(f"Failed: {failed}")
print(f"{'='*60}")

Total experiments to run: 31
Running with 8 parallel workers...

[INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:        [  0 255][  0 255][  0 255][  0 255][  0 255][  0 255]




[  0 255][INFO] Valores únicos tras invertir:[INFO] Valores únicos tras invertir:[  0 255]
[INFO] Valores únicos tras invertir:[INFO] Valores únicos tras invertir: 

  [INFO] Valores únicos tras invertir:[INFO] Valores únicos tras invertir: [  0 255][INFO] Valores únicos tras invertir:  [INFO] Valores únicos tras invertir:
[  0 255][  0 255]

NSGA-II Experiments:   0%|          | 0/31 [00:00<?, ?it/s]

[  0 255] [  0 255][  0 255]
 [INFO] Environment cargado: 92 x 113


[  0 255]
[INFO] Environment cargado: 92 x 113
[  0 255][INFO] Environment cargado: 92 x 113
[INFO] Environment cargado: 92 x 113[INFO] Environment cargado: 92 x 113



[INFO] Environment cargado: 92 x 113
[INFO] Environment cargado: 92 x 113

[INFO] Environment cargado: 92 x 113

=== Running NSGA-II Multiobjective GA ===

=== Running NSGA-II Multiobjective GA ===

=== Running NSGA-II Multiobjective GA ===

=== Running NSGA-II Multiobjective GA ===
=== Running NSGA-II Multiobjective GA ===


=== Running NSGA-II Multiobjective GA ===

=== Running NSGA-II Multiobjective GA ===

=== Running NSGA-II Multiobjective GA ===
[GEN  100] PenalBest=1916.0 | PenalAvg=7529.0 | CleanBest=1030.7 | CleanAvg=1034.3 | StdClean=12.1 | MinDist=2.00 | Conflicts=5
[GEN  100] PenalBest=1498.3 | PenalAvg=7406.6 | CleanBest=1026.4 | CleanAvg=1031.8 | StdClean=17.1 | MinDist=6.08 | Conflicts=0
[GEN  100] PenalBest=1698.2 | PenalAvg=7841.7 | Cl

## 7. Statistical Analysis

In [7]:
import pandas as pd
from scipy import stats

# Load results from database
con = sqlite3.connect(DB_PATH)
df = pd.read_sql_query("SELECT * FROM runs", con)
con.close()

print(f"Total runs in database: {len(df)}")
df.head()

Total runs in database: 31


Unnamed: 0,id,pop_size,ngen,cxpb,mutpb,seed,best_penalized,best_clean,best_tradeoff_penalized,best_tradeoff_clean,pareto_size,time_sec,timestamp
0,1,120,1000,0.8,0.3,4,1103.193001,1085.193001,1103.193001,1085.193001,14,386.591481,2025-12-11 08:11:15
1,2,120,1000,0.8,0.3,0,1175.429797,1100.979797,1175.429797,1100.979797,64,393.946214,2025-12-11 08:11:22
2,3,120,1000,0.8,0.3,3,1237.785642,1161.435642,1237.785642,1161.435642,17,394.148334,2025-12-11 08:11:22
3,4,120,1000,0.8,0.3,6,1134.144011,1103.394011,1134.144011,1103.394011,104,394.777181,2025-12-11 08:11:23
4,5,120,1000,0.8,0.3,1,1150.435137,1129.335137,1150.435137,1129.335137,36,395.162473,2025-12-11 08:11:23


In [8]:
# Group by configuration
grouped = df.groupby(['pop_size', 'ngen', 'cxpb', 'mutpb']).agg({
    'best_tradeoff_penalized': ['mean', 'std', 'min', 'max'],
    'best_tradeoff_clean': ['mean', 'std', 'min', 'max'],
    'pareto_size': ['mean', 'std'],
    'time_sec': ['mean', 'std'],
    'id': 'count'
}).round(3)

grouped.columns = ['_'.join(col).strip() for col in grouped.columns.values]
grouped = grouped.rename(columns={'id_count': 'n_runs'})
grouped = grouped.sort_values('best_tradeoff_penalized_mean')

print("\n=== Top 10 Configurations (by best tradeoff penalized) ===")
grouped.head(10)


=== Top 10 Configurations (by best tradeoff penalized) ===


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,best_tradeoff_penalized_mean,best_tradeoff_penalized_std,best_tradeoff_penalized_min,best_tradeoff_penalized_max,best_tradeoff_clean_mean,best_tradeoff_clean_std,best_tradeoff_clean_min,best_tradeoff_clean_max,pareto_size_mean,pareto_size_std,time_sec_mean,time_sec_std,n_runs
pop_size,ngen,cxpb,mutpb,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
120,1000,0.8,0.3,1227.074,113.014,1084.98,1666.271,1127.301,39.908,1069.98,1208.193,40.194,21.52,372.617,23.741,31


In [9]:
# Statistical summary for best configuration
best_config = grouped.head(1).index[0]
best_runs = df[
    (df['pop_size'] == best_config[0]) &
    (df['ngen'] == best_config[1]) &
    (df['cxpb'] == best_config[2]) &
    (df['mutpb'] == best_config[3])
]

print(f"\n=== Best Configuration ===")
print(f"pop_size={best_config[0]}, ngen={best_config[1]}, cxpb={best_config[2]}, mutpb={best_config[3]}")
print(f"\nStatistics (n={len(best_runs)}):")
print(f"  Tradeoff Penalized: {best_runs['best_tradeoff_penalized'].mean():.2f} ± {best_runs['best_tradeoff_penalized'].std():.2f}")
print(f"  Tradeoff Clean: {best_runs['best_tradeoff_clean'].mean():.2f} ± {best_runs['best_tradeoff_clean'].std():.2f}")
print(f"  Pareto Size: {best_runs['pareto_size'].mean():.2f} ± {best_runs['pareto_size'].std():.2f}")
print(f"  Time: {best_runs['time_sec'].mean():.2f} ± {best_runs['time_sec'].std():.2f} s")

# 95% Confidence intervals
ci_penalized = stats.t.interval(0.95, len(best_runs)-1, 
                                 loc=best_runs['best_tradeoff_penalized'].mean(),
                                 scale=stats.sem(best_runs['best_tradeoff_penalized']))
ci_clean = stats.t.interval(0.95, len(best_runs)-1,
                            loc=best_runs['best_tradeoff_clean'].mean(),
                            scale=stats.sem(best_runs['best_tradeoff_clean']))

print(f"\n95% Confidence Intervals:")
print(f"  Tradeoff Penalized: [{ci_penalized[0]:.2f}, {ci_penalized[1]:.2f}]")
print(f"  Tradeoff Clean: [{ci_clean[0]:.2f}, {ci_clean[1]:.2f}]")


=== Best Configuration ===
pop_size=120, ngen=1000, cxpb=0.8, mutpb=0.3

Statistics (n=31):
  Tradeoff Penalized: 1227.07 ± 113.01
  Tradeoff Clean: 1127.30 ± 39.91
  Pareto Size: 40.19 ± 21.52
  Time: 372.62 ± 23.74 s

95% Confidence Intervals:
  Tradeoff Penalized: [1185.62, 1268.53]
  Tradeoff Clean: [1112.66, 1141.94]


In [10]:
# Save statistical summary to CSV
grouped.to_csv('../results/individual/nsga2_statistical_summary.csv')
print("✓ Statistical summary saved to nsga2_statistical_summary.csv")

✓ Statistical summary saved to nsga2_statistical_summary.csv


## 8. Normality Tests

In [11]:
# Shapiro-Wilk test for normality
stat_pen, p_pen = stats.shapiro(best_runs['best_tradeoff_penalized'])
stat_clean, p_clean = stats.shapiro(best_runs['best_tradeoff_clean'])

print("\n=== Normality Tests (Shapiro-Wilk) ===")
print(f"Tradeoff Penalized: stat={stat_pen:.4f}, p-value={p_pen:.4f}")
print(f"  -> {'Normal' if p_pen > 0.05 else 'Not Normal'} (α=0.05)")
print(f"\nTradeoff Clean: stat={stat_clean:.4f}, p-value={p_clean:.4f}")
print(f"  -> {'Normal' if p_clean > 0.05 else 'Not Normal'} (α=0.05)")


=== Normality Tests (Shapiro-Wilk) ===
Tradeoff Penalized: stat=0.8170, p-value=0.0001
  -> Not Normal (α=0.05)

Tradeoff Clean: stat=0.9187, p-value=0.0219
  -> Not Normal (α=0.05)


## 9. Export Results for Visualization

In [12]:
# Export all results to CSV for visualization notebook
df.to_csv('../results/individual/nsga2_all_results.csv', index=False)
print("✓ All results exported to nsga2_all_results.csv")

✓ All results exported to nsga2_all_results.csv
