# Simulated Annealing Experiments with Best Parameters (30 Seeds)

This notebook runs SA experiments with the best configuration using 30 different seeds for statistical analysis.

## 1. Import Libraries

In [1]:
import os
import sys
import sqlite3
import time
import random
import numpy as np
import multiprocessing as mp

# Add project root to path
ROOT_DIR = os.path.dirname(os.path.abspath(os.getcwd()))
sys.path.append(ROOT_DIR)

from algorithms.sa_runner import run_sa

print("✓ Libraries imported successfully")

✓ Libraries imported successfully


## 2. Database Setup

In [2]:
DB_PATH = "../data/sa_experiments.db"

def init_sa_db(db_path):
    """Initialize database for SA results"""
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    
    cur.execute("""
        CREATE TABLE IF NOT EXISTS runs (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            n_iter INTEGER,
            start_temp REAL,
            end_temp REAL,
            seed INTEGER,
            best_penalized REAL,
            best_clean REAL,
            time_sec REAL,
            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
        )
    """)
    
    con.commit()
    con.close()
    print(f"✓ Database initialized: {db_path}")

init_sa_db(DB_PATH)

✓ Database initialized: sa_experiments.db


## 3. Save Results Function

In [3]:
def save_sa_run(db_path, n_iter, start_temp, end_temp, seed, best_penalized, best_distance, time_sec):
    """Save a single SA run to database"""
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    
    cur.execute("""
        INSERT INTO runs (
            n_iter, start_temp, end_temp, seed,
            best_penalized, best_clean, time_sec
        ) VALUES (?, ?, ?, ?, ?, ?, ?)
    """, (
        n_iter, start_temp, end_temp, seed,
        float(best_penalized), float(best_distance), time_sec
    ))
    
    con.commit()
    con.close()

print("✓ Save function defined")

✓ Save function defined


## 4. Worker Function for Multiprocessing

In [4]:
def sa_worker(args):
    """Worker function to run a single SA experiment"""
    n_iter, start_temp, end_temp, seed, db_path = args
    
    t0 = time.time()
    try:
        results = run_sa(
            n_iter=n_iter,
            start_temp=start_temp,
            end_temp=end_temp,
            seed=seed,
            show_plots=False,
            show_anim=False,
            debug_interval=max(1, n_iter // 10)
        )
        t1 = time.time()
        
        save_sa_run(db_path, n_iter, start_temp, end_temp, seed,
                   results['best_penalized'], results['best_distance'], t1 - t0)
        
        return {
            'success': True,
            'seed': seed,
            'best_penalized': results['best_penalized'],
            'time': t1 - t0
        }
    except Exception as e:
        return {
            'success': False,
            'seed': seed,
            'error': str(e)
        }

print("✓ Worker function defined")

✓ Worker function defined


## 5. Best Parameters Configuration

In [6]:
# Best parameters for SA (from grid search: n_iter=12000, start_temp=5, end_temp=0.01)
BEST_PARAMS = {
    'n_iter': 5000,
    'start_temp': 20,
    'end_temp': 0.01
}

# Seeds for statistical analysis (31 runs: 0-30)
SEEDS = list(range(31))

# Number of parallel processes
N_JOBS = min(8, mp.cpu_count())

print(f"Best parameters: {BEST_PARAMS}")
print(f"Number of seeds: {len(SEEDS)}")
print(f"Number of parallel jobs: {N_JOBS}")
print(f"Total experiments: {len(SEEDS)}")

Best parameters: {'n_iter': 5000, 'start_temp': 20, 'end_temp': 0.01}
Number of seeds: 31
Number of parallel jobs: 8
Total experiments: 31


## 6. Run Experiments with Multithreading

In [7]:
from tqdm.notebook import tqdm

# Generate tasks (one per seed, no nested loops)
tasks = [(BEST_PARAMS['n_iter'], BEST_PARAMS['start_temp'], BEST_PARAMS['end_temp'], 
          seed, DB_PATH) for seed in SEEDS]

print(f"Total experiments to run: {len(tasks)}")
print(f"Running with {N_JOBS} parallel workers...\n")

# Run experiments in parallel
start_time = time.time()
with mp.Pool(processes=N_JOBS) as pool:
    results = list(tqdm(
        pool.imap(sa_worker, tasks),
        total=len(tasks),
        desc="SA Experiments"
    ))
total_time = time.time() - start_time

# Summary
successful = sum(1 for r in results if r['success'])
failed = len(results) - successful

print(f"\n{'='*60}")
print(f"Experiments completed!")
print(f"Successful: {successful}")
print(f"Failed: {failed}")
print(f"Total time: {total_time/60:.2f} minutes")
print(f"Average time per run: {total_time/len(tasks):.2f} seconds")
print(f"{'='*60}")

Total experiments to run: 31
Running with 8 parallel workers...

[INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original: [INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:  [INFO] Valores únicos del BMP original:[INFO] Valores únicos del BMP original:

SA Experiments:   0%|          | 0/31 [00:00<?, ?it/s]

  [  0 255] [  0 255]  [  0 255][  0 255]

[  0 255][  0 255][  0 255]

[  0 255]

[INFO] Valores únicos tras invertir:

[INFO] Valores únicos tras invertir:[INFO] Valores únicos tras invertir:[INFO] Valores únicos tras invertir:[INFO] Valores únicos tras invertir:   [INFO] Valores únicos tras invertir:[INFO] Valores únicos tras invertir:[INFO] Valores únicos tras invertir:  [  0 255][  0 255][  0 255]  [  0 255][  0 255]



[  0 255]
[INFO] Environment cargado: 92 x 113[INFO] Environment cargado: 92 x 113[  0 255] 
[INFO] Environment cargado: 92 x 113

[  0 255][INFO] Environment cargado: 92 x 113
[INFO] Environment cargado: 92 x 113

[INFO] Environment cargado: 92 x 113
[INFO] Environment cargado: 92 x 113

[INFO] Environment cargado: 92 x 113

[IT     1] T=19.9696  BestPenalized=3297.08  BestCleanDist=1032.08  CurrCleanDist=1032.08  MinDist=0.00  Conflicts=15
[IT     1] T=19.9696  BestPenalized=3297.08  BestCleanDist=1032.08  CurrCleanDist=1032.08  MinDist=0.00  Conflicts=15
[IT    

## 7. Statistical Analysis

In [8]:
import pandas as pd
from scipy import stats

# Load results from database
con = sqlite3.connect(DB_PATH)
df = pd.read_sql_query("SELECT * FROM runs", con)
con.close()

print(f"Total runs in database: {len(df)}")
df.head()

Total runs in database: 31


Unnamed: 0,id,n_iter,start_temp,end_temp,seed,best_penalized,best_clean,time_sec,timestamp
0,1,5000,20.0,0.01,5,1885.755194,1210.205194,23.075649,2025-12-11 11:41:41
1,2,5000,20.0,0.01,7,1797.297835,1271.447835,23.944516,2025-12-11 11:41:42
2,3,5000,20.0,0.01,3,2187.859523,1383.359523,24.18892,2025-12-11 11:41:42
3,4,5000,20.0,0.01,4,2285.882611,1329.832611,24.435895,2025-12-11 11:41:42
4,5,5000,20.0,0.01,0,2718.954689,1318.104689,24.665557,2025-12-11 11:41:43


In [9]:
# Statistical summary
print("\n" + "="*80)
print("SA STATISTICAL SUMMARY")
print("="*80)
print(f"\nConfiguration: n_iter={BEST_PARAMS['n_iter']}, start_temp={BEST_PARAMS['start_temp']}, "
      f"end_temp={BEST_PARAMS['end_temp']}")
print(f"Number of runs: {len(df)}\n")

# Best Penalized
print(f"Best Penalized:")
print(f"  Mean: {df['best_penalized'].mean():.2f}")
print(f"  Std: {df['best_penalized'].std():.2f}")
print(f"  Min: {df['best_penalized'].min():.2f}")
print(f"  Max: {df['best_penalized'].max():.2f}")
print(f"  Median: {df['best_penalized'].median():.2f}")

# Best Clean
print(f"\nBest Clean:")
print(f"  Mean: {df['best_clean'].mean():.2f}")
print(f"  Std: {df['best_clean'].std():.2f}")
print(f"  Min: {df['best_clean'].min():.2f}")
print(f"  Max: {df['best_clean'].max():.2f}")
print(f"  Median: {df['best_clean'].median():.2f}")

# Computation Time
print(f"\nComputation Time (seconds):")
print(f"  Mean: {df['time_sec'].mean():.2f}")
print(f"  Std: {df['time_sec'].std():.2f}")
print(f"  Min: {df['time_sec'].min():.2f}")
print(f"  Max: {df['time_sec'].max():.2f}")

# 95% Confidence intervals
ci_penalized = stats.t.interval(0.95, len(df)-1, 
                                 loc=df['best_penalized'].mean(),
                                 scale=stats.sem(df['best_penalized']))
ci_clean = stats.t.interval(0.95, len(df)-1,
                            loc=df['best_clean'].mean(),
                            scale=stats.sem(df['best_clean']))

print(f"\n95% Confidence Intervals:")
print(f"  Best Penalized: [{ci_penalized[0]:.2f}, {ci_penalized[1]:.2f}]")
print(f"  Best Clean: [{ci_clean[0]:.2f}, {ci_clean[1]:.2f}]")


SA STATISTICAL SUMMARY

Configuration: n_iter=5000, start_temp=20, end_temp=0.01
Number of runs: 31

Best Penalized:
  Mean: 2135.03
  Std: 288.19
  Min: 1668.14
  Max: 2779.87
  Median: 2108.30

Best Clean:
  Mean: 1279.46
  Std: 83.00
  Min: 1124.98
  Max: 1464.79
  Median: 1271.52

Computation Time (seconds):
  Mean: 23.24
  Std: 1.95
  Min: 19.14
  Max: 27.38

95% Confidence Intervals:
  Best Penalized: [2029.32, 2240.74]
  Best Clean: [1249.01, 1309.90]


## 8. Normality Tests

In [10]:
# Shapiro-Wilk test for normality
stat_pen, p_pen = stats.shapiro(df['best_penalized'])
stat_clean, p_clean = stats.shapiro(df['best_clean'])

print("\n=== Normality Tests (Shapiro-Wilk) ===")
print(f"Best Penalized: stat={stat_pen:.4f}, p-value={p_pen:.4f}")
print(f"  -> {'Normal distribution' if p_pen > 0.05 else 'Not normal distribution'} (α=0.05)")
print(f"\nBest Clean: stat={stat_clean:.4f}, p-value={p_clean:.4f}")
print(f"  -> {'Normal distribution' if p_clean > 0.05 else 'Not normal distribution'} (α=0.05)")


=== Normality Tests (Shapiro-Wilk) ===
Best Penalized: stat=0.9498, p-value=0.1545
  -> Normal distribution (α=0.05)

Best Clean: stat=0.9805, p-value=0.8271
  -> Normal distribution (α=0.05)


## 9. Export Results

In [11]:
# Export all results to CSV
df.to_csv('../results/individual/sa_all_results.csv', index=False)
print("✓ All results exported to sa_all_results.csv")

# Export summary statistics
summary = pd.DataFrame({
    'metric': ['best_penalized', 'best_clean', 'time_sec'],
    'mean': [df['best_penalized'].mean(), df['best_clean'].mean(), df['time_sec'].mean()],
    'std': [df['best_penalized'].std(), df['best_clean'].std(), df['time_sec'].std()],
    'min': [df['best_penalized'].min(), df['best_clean'].min(), df['time_sec'].min()],
    'max': [df['best_penalized'].max(), df['best_clean'].max(), df['time_sec'].max()],
    'median': [df['best_penalized'].median(), df['best_clean'].median(), df['time_sec'].median()]
})
summary.to_csv('../results/individual/sa_statistical_summary.csv', index=False)
print("✓ Statistical summary exported to sa_statistical_summary.csv")

✓ All results exported to sa_all_results.csv
✓ Statistical summary exported to sa_statistical_summary.csv
