From d8680732850bf491acae5ec5be6348346453435e Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Thu, 10 Jul 2025 13:41:45 +0800
Subject: [PATCH 1/2] Improve population management and reproducibility in
 evolution

Refactors population size enforcement in ProgramDatabase to protect newly added and best programs, and ensures tracked best program references are valid. Enhances random seed propagation for reproducibility, improves logging consistency, and applies minor code style and logic cleanups across controller, LLM ensemble, prompt sampler, and threaded parallel modules.
---
 openevolve/config.py            |  16 +--
 openevolve/controller.py        |  50 ++++-----
 openevolve/database.py          |  51 ++++++---
 openevolve/iteration.py         |  21 ++--
 openevolve/llm/ensemble.py      |  14 ++-
 openevolve/llm/openai.py        |   8 +-
 openevolve/prompt/sampler.py    |  48 ++++----
 openevolve/threaded_parallel.py | 191 +++++++++++++++++---------------
 8 files changed, 220 insertions(+), 179 deletions(-)

diff --git a/openevolve/config.py b/openevolve/config.py
index c89766d1c..df859f146 100644
--- a/openevolve/config.py
+++ b/openevolve/config.py
@@ -32,7 +32,7 @@ class LLMModelConfig:
     timeout: int = None
     retries: int = None
     retry_delay: int = None
-    
+
     # Reproducibility
     random_seed: Optional[int] = None
 
@@ -56,10 +56,12 @@ class LLMConfig(LLMModelConfig):
     retry_delay: int = 5
 
     # n-model configuration for evolution LLM ensemble
-    models: List[LLMModelConfig] = field(default_factory=lambda: [
-        LLMModelConfig(name="gpt-4o-mini", weight=0.8),
-        LLMModelConfig(name="gpt-4o", weight=0.2)
-    ])
+    models: List[LLMModelConfig] = field(
+        default_factory=lambda: [
+            LLMModelConfig(name="gpt-4o-mini", weight=0.8),
+            LLMModelConfig(name="gpt-4o", weight=0.2),
+        ]
+    )
 
     # n-model configuration for evaluator LLM ensemble
     evaluator_models: List[LLMModelConfig] = field(default_factory=lambda: [])
@@ -264,7 +266,7 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "Config":
             config.prompt = PromptConfig(**config_dict["prompt"])
         if "database" in config_dict:
             config.database = DatabaseConfig(**config_dict["database"])
-        
+
         # Ensure database inherits the random seed if not explicitly set
         if config.database.random_seed is None and config.random_seed is not None:
             config.database.random_seed = config.random_seed
@@ -365,4 +367,4 @@ def load_config(config_path: Optional[Union[str, Path]] = None) -> Config:
     # Make the system message available to the individual models, in case it is not provided from the prompt sampler
     config.llm.update_model_params({"system_message": config.prompt.system_message})
 
-    return config
\ No newline at end of file
+    return config
diff --git a/openevolve/controller.py b/openevolve/controller.py
index f9cefe956..0c0ab4d29 100644
--- a/openevolve/controller.py
+++ b/openevolve/controller.py
@@ -104,20 +104,20 @@ def __init__(
             # Set global random seeds
             random.seed(self.config.random_seed)
             np.random.seed(self.config.random_seed)
-            
+
             # Create hash-based seeds for different components
-            base_seed = str(self.config.random_seed).encode('utf-8')
-            llm_seed = int(hashlib.md5(base_seed + b'llm').hexdigest()[:8], 16) % (2**31)
-            
+            base_seed = str(self.config.random_seed).encode("utf-8")
+            llm_seed = int(hashlib.md5(base_seed + b"llm").hexdigest()[:8], 16) % (2**31)
+
             # Propagate seed to LLM configurations
             self.config.llm.random_seed = llm_seed
             for model_cfg in self.config.llm.models:
-                if not hasattr(model_cfg, 'random_seed') or model_cfg.random_seed is None:
+                if not hasattr(model_cfg, "random_seed") or model_cfg.random_seed is None:
                     model_cfg.random_seed = llm_seed
             for model_cfg in self.config.llm.evaluator_models:
-                if not hasattr(model_cfg, 'random_seed') or model_cfg.random_seed is None:
+                if not hasattr(model_cfg, "random_seed") or model_cfg.random_seed is None:
                     model_cfg.random_seed = llm_seed
-            
+
             logger.info(f"Set random seed to {self.config.random_seed} for reproducibility")
             logger.debug(f"Generated LLM seed: {llm_seed}")
 
@@ -161,7 +161,7 @@ def __init__(
         self.evaluation_file = evaluation_file
 
         logger.info(f"Initialized OpenEvolve with {initial_program_path}")
-        
+
         # Initialize improved parallel processing components
         self.parallel_controller = None
 
@@ -212,7 +212,7 @@ async def run(
             Best program found
         """
         max_iterations = iterations or self.config.max_iterations
-        
+
         # Determine starting iteration
         start_iteration = 0
         if checkpoint_path and os.path.exists(checkpoint_path):
@@ -260,30 +260,31 @@ async def run(
             self.parallel_controller = ImprovedParallelController(
                 self.config, self.evaluation_file, self.database
             )
-            
+
             # Set up signal handlers for graceful shutdown
             def signal_handler(signum, frame):
                 logger.info(f"Received signal {signum}, initiating graceful shutdown...")
                 self.parallel_controller.request_shutdown()
-                
+
                 # Set up a secondary handler for immediate exit if user presses Ctrl+C again
                 def force_exit_handler(signum, frame):
                     logger.info("Force exit requested - terminating immediately")
                     import sys
+
                     sys.exit(0)
-                
+
                 signal.signal(signal.SIGINT, force_exit_handler)
-            
+
             signal.signal(signal.SIGINT, signal_handler)
             signal.signal(signal.SIGTERM, signal_handler)
-            
+
             self.parallel_controller.start()
-            
+
             # Run evolution with improved parallel processing and checkpoint callback
             await self._run_evolution_with_checkpoints(
                 start_iteration, max_iterations, target_score
             )
-            
+
         finally:
             # Clean up parallel processing resources
             if self.parallel_controller:
@@ -420,12 +421,10 @@ def _load_checkpoint(self, checkpoint_path: str) -> None:
         """Load state from a checkpoint directory"""
         if not os.path.exists(checkpoint_path):
             raise FileNotFoundError(f"Checkpoint directory {checkpoint_path} not found")
-        
+
         logger.info(f"Loading checkpoint from {checkpoint_path}")
         self.database.load(checkpoint_path)
-        logger.info(
-            f"Checkpoint loaded successfully (iteration {self.database.last_iteration})"
-        )
+        logger.info(f"Checkpoint loaded successfully (iteration {self.database.last_iteration})")
 
     async def _run_evolution_with_checkpoints(
         self, start_iteration: int, max_iterations: int, target_score: Optional[float]
@@ -433,18 +432,17 @@ async def _run_evolution_with_checkpoints(
         """Run evolution with checkpoint saving support"""
         logger.info(f"Using island-based evolution with {self.config.database.num_islands} islands")
         self.database.log_island_status()
-        
+
         # Run the evolution process with checkpoint callback
         await self.parallel_controller.run_evolution(
-            start_iteration, max_iterations, target_score,
-            checkpoint_callback=self._save_checkpoint
+            start_iteration, max_iterations, target_score, checkpoint_callback=self._save_checkpoint
         )
-        
+
         # Check if shutdown was requested
         if self.parallel_controller.shutdown_flag.is_set():
             logger.info("Evolution stopped due to shutdown request")
             return
-        
+
         # Save final checkpoint if needed
         final_iteration = start_iteration + max_iterations - 1
         if final_iteration > 0 and final_iteration % self.config.checkpoint_interval == 0:
@@ -499,4 +497,4 @@ def _save_best_program(self, program: Optional[Program] = None) -> None:
                 indent=2,
             )
 
-        logger.info(f"Saved best program to {code_path} with program info to {info_path}")
\ No newline at end of file
+        logger.info(f"Saved best program to {code_path} with program info to {info_path}")
diff --git a/openevolve/database.py b/openevolve/database.py
index f33f994a1..8aba55283 100644
--- a/openevolve/database.py
+++ b/openevolve/database.py
@@ -9,6 +9,7 @@
 import random
 import time
 from dataclasses import asdict, dataclass, field, fields
+
 # FileLock removed - no longer needed with threaded parallel processing
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
@@ -198,7 +199,11 @@ def add(
         # Update archive
         self._update_archive(program)
 
-        # Update the absolute best program tracking
+        # Enforce population size limit BEFORE updating best program tracking
+        # This ensures newly added programs aren't immediately removed
+        self._enforce_population_limit(exclude_program_id=program.id)
+
+        # Update the absolute best program tracking (after population enforcement)
         self._update_best_program(program)
 
         # Save to disk if configured
@@ -207,9 +212,6 @@ def add(
 
         logger.debug(f"Added program {program.id} to island {island_idx}")
 
-        # Enforce population size limit
-        self._enforce_population_limit()
-
         return program.id
 
     def get(self, program_id: str) -> Optional[Program]:
@@ -254,9 +256,15 @@ def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]:
             return None
 
         # If no specific metric and we have a tracked best program, return it
-        if metric is None and self.best_program_id and self.best_program_id in self.programs:
-            logger.debug(f"Using tracked best program: {self.best_program_id}")
-            return self.programs[self.best_program_id]
+        if metric is None and self.best_program_id:
+            if self.best_program_id in self.programs:
+                logger.debug(f"Using tracked best program: {self.best_program_id}")
+                return self.programs[self.best_program_id]
+            else:
+                logger.warning(
+                    f"Tracked best program {self.best_program_id} no longer exists, will recalculate"
+                )
+                self.best_program_id = None
 
         if metric:
             # Sort by specific metric
@@ -713,7 +721,15 @@ def _update_best_program(self, program: Program) -> None:
             logger.debug(f"Set initial best program to {program.id}")
             return
 
-        # Compare with current best program
+        # Compare with current best program (if it still exists)
+        if self.best_program_id not in self.programs:
+            logger.warning(
+                f"Best program {self.best_program_id} no longer exists, clearing reference"
+            )
+            self.best_program_id = program.id
+            logger.info(f"Set new best program to {program.id}")
+            return
+
         current_best = self.programs[self.best_program_id]
 
         # Update if the new program is better
@@ -940,9 +956,12 @@ def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:
 
         return inspirations[:n]
 
-    def _enforce_population_limit(self) -> None:
+    def _enforce_population_limit(self, exclude_program_id: Optional[str] = None) -> None:
         """
         Enforce the population size limit by removing worst programs if needed
+
+        Args:
+            exclude_program_id: Program ID to never remove (e.g., newly added program)
         """
         if len(self.programs) <= self.config.population_size:
             return
@@ -963,22 +982,24 @@ def _enforce_population_limit(self) -> None:
             key=lambda p: safe_numeric_average(p.metrics),
         )
 
-        # Remove worst programs, but never remove the best program
+        # Remove worst programs, but never remove the best program or excluded program
         programs_to_remove = []
+        protected_ids = {self.best_program_id, exclude_program_id} - {None}
+
         for program in sorted_programs:
             if len(programs_to_remove) >= num_to_remove:
                 break
-            # Don't remove the best program
-            if program.id != self.best_program_id:
+            # Don't remove the best program or excluded program
+            if program.id not in protected_ids:
                 programs_to_remove.append(program)
 
-        # If we still need to remove more and only have the best program protected,
-        # remove from the remaining programs anyway (but keep the absolute best)
+        # If we still need to remove more and only have protected programs,
+        # remove from the remaining programs anyway (but keep the protected ones)
         if len(programs_to_remove) < num_to_remove:
             remaining_programs = [
                 p
                 for p in sorted_programs
-                if p not in programs_to_remove and p.id != self.best_program_id
+                if p not in programs_to_remove and p.id not in protected_ids
             ]
             additional_removals = remaining_programs[: num_to_remove - len(programs_to_remove)]
             programs_to_remove.extend(additional_removals)
diff --git a/openevolve/iteration.py b/openevolve/iteration.py
index af3d3850d..98db88f09 100644
--- a/openevolve/iteration.py
+++ b/openevolve/iteration.py
@@ -31,24 +31,21 @@ class Result:
     artifacts: dict = None
 
 
-
-
-
 async def run_iteration_with_shared_db(
-    iteration: int, 
-    config: Config, 
+    iteration: int,
+    config: Config,
     database: ProgramDatabase,
     evaluator: Evaluator,
     llm_ensemble: LLMEnsemble,
-    prompt_sampler: PromptSampler
+    prompt_sampler: PromptSampler,
 ):
     """
     Run a single iteration using shared memory database
-    
+
     This is optimized for use with persistent worker processes.
     """
     logger = logging.getLogger(__name__)
-    
+
     try:
         # Sample parent and inspirations from database
         parent, inspirations = database.sample()
@@ -115,10 +112,10 @@ async def run_iteration_with_shared_db(
         # Evaluate the child program
         child_id = str(uuid.uuid4())
         result.child_metrics = await evaluator.evaluate_program(child_code, child_id)
-        
+
         # Handle artifacts if they exist
         artifacts = evaluator.get_pending_artifacts(child_id)
-        
+
         # Create a child program
         result.child_program = Program(
             id=child_id,
@@ -133,13 +130,13 @@ async def run_iteration_with_shared_db(
                 "parent_metrics": parent.metrics,
             },
         )
-        
+
         result.prompt = prompt
         result.llm_response = llm_response
         result.artifacts = artifacts
         result.iteration_time = time.time() - iteration_start
         result.iteration = iteration
-        
+
         return result
 
     except Exception as e:
diff --git a/openevolve/llm/ensemble.py b/openevolve/llm/ensemble.py
index fa11e28f8..f5db7be2b 100644
--- a/openevolve/llm/ensemble.py
+++ b/openevolve/llm/ensemble.py
@@ -27,16 +27,22 @@ def __init__(self, models_cfg: List[LLMModelConfig]):
         self.weights = [model.weight for model in models_cfg]
         total = sum(self.weights)
         self.weights = [w / total for w in self.weights]
-        
+
         # Set up random state for deterministic model selection
         self.random_state = random.Random()
         # Initialize with seed from first model's config if available
-        if models_cfg and hasattr(models_cfg[0], 'random_seed') and models_cfg[0].random_seed is not None:
+        if (
+            models_cfg
+            and hasattr(models_cfg[0], "random_seed")
+            and models_cfg[0].random_seed is not None
+        ):
             self.random_state.seed(models_cfg[0].random_seed)
-            logger.debug(f"LLMEnsemble: Set random seed to {models_cfg[0].random_seed} for deterministic model selection")
+            logger.debug(
+                f"LLMEnsemble: Set random seed to {models_cfg[0].random_seed} for deterministic model selection"
+            )
 
         # Only log if we have multiple models or this is the first ensemble
-        if len(models_cfg) > 1 or not hasattr(logger, '_ensemble_logged'):
+        if len(models_cfg) > 1 or not hasattr(logger, "_ensemble_logged"):
             logger.info(
                 f"Initialized LLM ensemble with models: "
                 + ", ".join(
diff --git a/openevolve/llm/openai.py b/openevolve/llm/openai.py
index 705d0d4ac..7946b4d81 100644
--- a/openevolve/llm/openai.py
+++ b/openevolve/llm/openai.py
@@ -32,7 +32,7 @@ def __init__(
         self.retry_delay = model_cfg.retry_delay
         self.api_base = model_cfg.api_base
         self.api_key = model_cfg.api_key
-        self.random_seed = getattr(model_cfg, 'random_seed', None)
+        self.random_seed = getattr(model_cfg, "random_seed", None)
 
         # Set up API client
         self.client = openai.OpenAI(
@@ -41,9 +41,9 @@ def __init__(
         )
 
         # Only log unique models to reduce duplication
-        if not hasattr(logger, '_initialized_models'):
+        if not hasattr(logger, "_initialized_models"):
             logger._initialized_models = set()
-        
+
         if self.model not in logger._initialized_models:
             logger.info(f"Initialized OpenAI LLM with model: {self.model}")
             logger._initialized_models.add(self.model)
@@ -80,7 +80,7 @@ async def generate_with_context(
                 "top_p": kwargs.get("top_p", self.top_p),
                 "max_tokens": kwargs.get("max_tokens", self.max_tokens),
             }
-        
+
         # Add seed parameter for reproducibility if configured
         # Skip seed parameter for Google AI Studio endpoint as it doesn't support it
         seed = kwargs.get("seed", self.random_seed)
diff --git a/openevolve/prompt/sampler.py b/openevolve/prompt/sampler.py
index 44b8acfb3..af0d2eb04 100644
--- a/openevolve/prompt/sampler.py
+++ b/openevolve/prompt/sampler.py
@@ -29,7 +29,7 @@ def __init__(self, config: PromptConfig):
         self.user_template_override = None
 
         # Only log once to reduce duplication
-        if not hasattr(logger, '_prompt_sampler_logged'):
+        if not hasattr(logger, "_prompt_sampler_logged"):
             logger.info("Initialized prompt sampler")
             logger._prompt_sampler_logged = True
 
@@ -412,39 +412,39 @@ def _format_inspirations_section(
     ) -> str:
         """
         Format the inspirations section for the prompt
-        
+
         Args:
             inspirations: List of inspiration programs
             language: Programming language
-            
+
         Returns:
             Formatted inspirations section string
         """
         if not inspirations:
             return ""
-            
+
         # Get templates
         inspirations_section_template = self.template_manager.get_template("inspirations_section")
         inspiration_program_template = self.template_manager.get_template("inspiration_program")
-        
+
         inspiration_programs_str = ""
-        
+
         for i, program in enumerate(inspirations):
             # Extract a snippet (first 8 lines) for display
             program_code = program.get("code", "")
             program_snippet = "\n".join(program_code.split("\n")[:8])
             if len(program_code.split("\n")) > 8:
                 program_snippet += "\n# ... (truncated for brevity)"
-            
+
             # Calculate a composite score using safe numeric average
             score = safe_numeric_average(program.get("metrics", {}))
-            
+
             # Determine program type based on metadata and score
             program_type = self._determine_program_type(program)
-            
+
             # Extract unique features (emphasizing diversity rather than just performance)
             unique_features = self._extract_unique_features(program)
-            
+
             inspiration_programs_str += (
                 inspiration_program_template.format(
                     program_number=i + 1,
@@ -456,24 +456,24 @@ def _format_inspirations_section(
                 )
                 + "\n\n"
             )
-        
+
         return inspirations_section_template.format(
             inspiration_programs=inspiration_programs_str.strip()
         )
-        
+
     def _determine_program_type(self, program: Dict[str, Any]) -> str:
         """
         Determine the type/category of an inspiration program
-        
+
         Args:
             program: Program dictionary
-            
+
         Returns:
             String describing the program type
         """
         metadata = program.get("metadata", {})
         score = safe_numeric_average(program.get("metrics", {}))
-        
+
         # Check metadata for explicit type markers
         if metadata.get("diverse", False):
             return "Diverse"
@@ -481,7 +481,7 @@ def _determine_program_type(self, program: Dict[str, Any]) -> str:
             return "Migrant"
         if metadata.get("random", False):
             return "Random"
-            
+
         # Classify based on score ranges
         if score >= 0.8:
             return "High-Performer"
@@ -491,26 +491,26 @@ def _determine_program_type(self, program: Dict[str, Any]) -> str:
             return "Experimental"
         else:
             return "Exploratory"
-            
+
     def _extract_unique_features(self, program: Dict[str, Any]) -> str:
         """
         Extract unique features of an inspiration program
-        
+
         Args:
             program: Program dictionary
-            
+
         Returns:
             String describing unique aspects of the program
         """
         features = []
-        
+
         # Extract from metadata if available
         metadata = program.get("metadata", {})
         if "changes" in metadata:
             changes = metadata["changes"]
             if isinstance(changes, str) and len(changes) < 100:
                 features.append(f"Modification: {changes}")
-        
+
         # Analyze metrics for standout characteristics
         metrics = program.get("metrics", {})
         for metric_name, value in metrics.items():
@@ -519,7 +519,7 @@ def _extract_unique_features(self, program: Dict[str, Any]) -> str:
                     features.append(f"Excellent {metric_name} ({value:.3f})")
                 elif value <= 0.3:
                     features.append(f"Alternative {metric_name} approach")
-        
+
         # Code-based features (simple heuristics)
         code = program.get("code", "")
         if code:
@@ -534,12 +534,12 @@ def _extract_unique_features(self, program: Dict[str, Any]) -> str:
                 features.append("Concise implementation")
             elif len(code.split("\n")) > 50:
                 features.append("Comprehensive implementation")
-        
+
         # Default if no specific features found
         if not features:
             program_type = self._determine_program_type(program)
             features.append(f"{program_type} approach to the problem")
-            
+
         return ", ".join(features[:3])  # Limit to top 3 features
 
     def _apply_template_variations(self, template: str) -> str:
diff --git a/openevolve/threaded_parallel.py b/openevolve/threaded_parallel.py
index e772d6920..815de9689 100644
--- a/openevolve/threaded_parallel.py
+++ b/openevolve/threaded_parallel.py
@@ -23,92 +23,93 @@
 class ThreadedEvaluationPool:
     """
     Thread-based parallel evaluation pool for improved performance
-    
+
     Uses threads instead of processes to avoid pickling issues while
     still providing parallelism for I/O-bound LLM calls.
     """
-    
+
     def __init__(self, config: Config, evaluation_file: str, database: ProgramDatabase):
         self.config = config
         self.evaluation_file = evaluation_file
         self.database = database
-        
+
         self.num_workers = config.evaluator.parallel_evaluations
         self.executor = None
-        
+
         # Pre-initialize components for each thread
         self.thread_local = threading.local()
-        
+
         logger.info(f"Initializing threaded evaluation pool with {self.num_workers} workers")
-    
+
     def start(self) -> None:
         """Start the thread pool"""
         self.executor = ThreadPoolExecutor(
-            max_workers=self.num_workers, 
-            thread_name_prefix="EvalWorker"
+            max_workers=self.num_workers, thread_name_prefix="EvalWorker"
         )
         logger.info(f"Started thread pool with {self.num_workers} threads")
-    
+
     def stop(self) -> None:
         """Stop the thread pool"""
         if self.executor:
             self.executor.shutdown(wait=True)
             self.executor = None
         logger.info("Stopped thread pool")
-    
+
     def submit_evaluation(self, iteration: int) -> Future:
         """
         Submit an evaluation task to the thread pool
-        
+
         Args:
             iteration: Iteration number to evaluate
-            
+
         Returns:
             Future that will contain the result
         """
         if not self.executor:
             raise RuntimeError("Thread pool not started")
-        
+
         return self.executor.submit(self._run_evaluation, iteration)
-    
+
     def _run_evaluation(self, iteration: int):
         """Run evaluation in a worker thread"""
         # Get or create thread-local components
-        if not hasattr(self.thread_local, 'initialized'):
+        if not hasattr(self.thread_local, "initialized"):
             self._initialize_thread_components()
-        
+
         try:
             # Run the iteration
-            result = asyncio.run(run_iteration_with_shared_db(
-                iteration,
-                self.config,
-                self.database,  # Shared database (thread-safe reads)
-                self.thread_local.evaluator,
-                self.thread_local.llm_ensemble,
-                self.thread_local.prompt_sampler
-            ))
-            
+            result = asyncio.run(
+                run_iteration_with_shared_db(
+                    iteration,
+                    self.config,
+                    self.database,  # Shared database (thread-safe reads)
+                    self.thread_local.evaluator,
+                    self.thread_local.llm_ensemble,
+                    self.thread_local.prompt_sampler,
+                )
+            )
+
             return result
-            
+
         except Exception as e:
             logger.error(f"Error in thread evaluation {iteration}: {e}")
             return None
-    
+
     def _initialize_thread_components(self) -> None:
         """Initialize components for this thread"""
         thread_id = threading.get_ident()
         logger.debug(f"Initializing components for thread {thread_id}")
-        
+
         try:
             # Initialize LLM components
             self.thread_local.llm_ensemble = LLMEnsemble(self.config.llm.models)
             self.thread_local.llm_evaluator_ensemble = LLMEnsemble(self.config.llm.evaluator_models)
-            
+
             # Initialize prompt samplers
             self.thread_local.prompt_sampler = PromptSampler(self.config.prompt)
             self.thread_local.evaluator_prompt_sampler = PromptSampler(self.config.prompt)
             self.thread_local.evaluator_prompt_sampler.set_templates("evaluator_system_message")
-            
+
             # Initialize evaluator
             self.thread_local.evaluator = Evaluator(
                 self.config.evaluator,
@@ -117,10 +118,10 @@ def _initialize_thread_components(self) -> None:
                 self.thread_local.evaluator_prompt_sampler,
                 database=self.database,
             )
-            
+
             self.thread_local.initialized = True
             logger.debug(f"Initialized components for thread {thread_id}")
-            
+
         except Exception as e:
             logger.error(f"Failed to initialize thread components: {e}")
             raise
@@ -130,135 +131,146 @@ class ImprovedParallelController:
     """
     Controller for improved parallel processing using shared memory and threads
     """
-    
+
     def __init__(self, config: Config, evaluation_file: str, database: ProgramDatabase):
         self.config = config
         self.evaluation_file = evaluation_file
         self.database = database
-        
+
         self.thread_pool = None
         self.database_lock = threading.RLock()  # For database writes
         self.shutdown_flag = threading.Event()  # For graceful shutdown
-        
+
     def start(self) -> None:
         """Start the improved parallel system"""
-        self.thread_pool = ThreadedEvaluationPool(
-            self.config, self.evaluation_file, self.database
-        )
+        self.thread_pool = ThreadedEvaluationPool(self.config, self.evaluation_file, self.database)
         self.thread_pool.start()
-        
+
         logger.info("Started improved parallel controller")
-    
+
     def stop(self) -> None:
         """Stop the improved parallel system"""
         self.shutdown_flag.set()  # Signal shutdown
-        
+
         if self.thread_pool:
             self.thread_pool.stop()
             self.thread_pool = None
-        
+
         logger.info("Stopped improved parallel controller")
-    
+
     def request_shutdown(self) -> None:
         """Request graceful shutdown (for signal handlers)"""
         logger.info("Graceful shutdown requested...")
         self.shutdown_flag.set()
-    
+
     async def run_evolution(
-        self, start_iteration: int, max_iterations: int, target_score: Optional[float] = None,
-        checkpoint_callback=None
+        self,
+        start_iteration: int,
+        max_iterations: int,
+        target_score: Optional[float] = None,
+        checkpoint_callback=None,
     ):
         """
         Run evolution with improved parallel processing
-        
+
         Args:
             start_iteration: Starting iteration number
             max_iterations: Maximum number of iterations
             target_score: Target score to achieve
-            
+
         Returns:
             Best program found
         """
         total_iterations = start_iteration + max_iterations
-        
+
         logger.info(
             f"Starting improved parallel evolution from iteration {start_iteration} "
             f"for {max_iterations} iterations (total: {total_iterations})"
         )
-        
+
         # Submit initial batch of evaluations
         pending_futures = {}
         batch_size = min(self.config.evaluator.parallel_evaluations * 2, max_iterations)
-        
+
         for i in range(start_iteration, min(start_iteration + batch_size, total_iterations)):
             future = self.thread_pool.submit_evaluation(i)
             pending_futures[i] = future
-        
+
         next_iteration_to_submit = start_iteration + batch_size
         completed_iterations = 0
-        
+
         # Island management
         programs_per_island = max(1, max_iterations // (self.config.database.num_islands * 10))
         current_island_counter = 0
-        
+
         # Process results as they complete
-        while pending_futures and completed_iterations < max_iterations and not self.shutdown_flag.is_set():
+        while (
+            pending_futures
+            and completed_iterations < max_iterations
+            and not self.shutdown_flag.is_set()
+        ):
             # Find completed futures
             completed_iteration = None
             for iteration, future in list(pending_futures.items()):
                 if future.done():
                     completed_iteration = iteration
                     break
-            
+
             if completed_iteration is None:
                 # No results ready, wait a bit
                 await asyncio.sleep(0.01)
                 continue
-            
+
             # Process completed result
             future = pending_futures.pop(completed_iteration)
-            
+
             try:
                 result = future.result()
-                
-                if result and hasattr(result, 'child_program') and result.child_program:
+
+                if result and hasattr(result, "child_program") and result.child_program:
                     # Thread-safe database update
                     with self.database_lock:
                         self.database.add(result.child_program, iteration=completed_iteration)
-                        
+
                         # Store artifacts if they exist
                         if result.artifacts:
                             self.database.store_artifacts(result.child_program.id, result.artifacts)
-                        
+
                         # Log prompts
-                        if hasattr(result, 'prompt') and result.prompt:
+                        if hasattr(result, "prompt") and result.prompt:
                             self.database.log_prompt(
                                 template_key=(
-                                    "full_rewrite_user" if not self.config.diff_based_evolution 
+                                    "full_rewrite_user"
+                                    if not self.config.diff_based_evolution
                                     else "diff_user"
                                 ),
                                 program_id=result.child_program.id,
                                 prompt=result.prompt,
-                                responses=[result.llm_response] if hasattr(result, 'llm_response') else [],
+                                responses=(
+                                    [result.llm_response] if hasattr(result, "llm_response") else []
+                                ),
                             )
-                        
+
                         # Manage island evolution
-                        if completed_iteration > start_iteration and current_island_counter >= programs_per_island:
+                        if (
+                            completed_iteration > start_iteration
+                            and current_island_counter >= programs_per_island
+                        ):
                             self.database.next_island()
                             current_island_counter = 0
                             logger.debug(f"Switched to island {self.database.current_island}")
-                        
+
                         current_island_counter += 1
-                        
+
                         # Increment generation for current island
                         self.database.increment_island_generation()
-                        
+
                         # Check migration
                         if self.database.should_migrate():
                             logger.info(f"Performing migration at iteration {completed_iteration}")
                             self.database.migrate_programs()
                             self.database.log_island_status()
-                    
+
                     # Log progress (outside lock)
                     logger.info(
                         f"Iteration {completed_iteration}: "
@@ -266,32 +278,37 @@ async def run_evolution(
                         f"(parent: {result.parent.id if result.parent else 'None'}) "
                         f"completed in {result.iteration_time:.2f}s"
                     )
-                    
+
                     if result.child_program.metrics:
-                        metrics_str = ", ".join([
-                            f"{k}={v:.4f}" if isinstance(v, (int, float)) else f"{k}={v}"
-                            for k, v in result.child_program.metrics.items()
-                        ])
+                        metrics_str = ", ".join(
+                            [
+                                f"{k}={v:.4f}" if isinstance(v, (int, float)) else f"{k}={v}"
+                                for k, v in result.child_program.metrics.items()
+                            ]
+                        )
                         logger.info(f"Metrics: {metrics_str}")
-                    
+
                     # Check for new best program
                     if self.database.best_program_id == result.child_program.id:
                         logger.info(
                             f"🌟 New best solution found at iteration {completed_iteration}: "
                             f"{result.child_program.id}"
                         )
-                    
+
                     # Save checkpoints at intervals
                     if completed_iteration % self.config.checkpoint_interval == 0:
-                        logger.info(f"Checkpoint interval reached at iteration {completed_iteration}")
+                        logger.info(
+                            f"Checkpoint interval reached at iteration {completed_iteration}"
+                        )
                         self.database.log_island_status()
                         if checkpoint_callback:
                             checkpoint_callback(completed_iteration)
-                    
+
                     # Check target score
                     if target_score is not None and result.child_program.metrics:
                         numeric_metrics = [
-                            v for v in result.child_program.metrics.values() 
+                            v
+                            for v in result.child_program.metrics.values()
                             if isinstance(v, (int, float))
                         ]
                         if numeric_metrics:
@@ -303,18 +320,18 @@ async def run_evolution(
                                 break
                 else:
                     logger.warning(f"No valid result from iteration {completed_iteration}")
-                
+
             except Exception as e:
                 logger.error(f"Error processing result from iteration {completed_iteration}: {e}")
-            
+
             completed_iterations += 1
-            
+
             # Submit next iteration if available
             if next_iteration_to_submit < total_iterations:
                 future = self.thread_pool.submit_evaluation(next_iteration_to_submit)
                 pending_futures[next_iteration_to_submit] = future
                 next_iteration_to_submit += 1
-        
+
         # Handle shutdown or completion
         if self.shutdown_flag.is_set():
             logger.info("Shutdown requested, canceling remaining evaluations...")
@@ -329,8 +346,8 @@ async def run_evolution(
                     future.result(timeout=10.0)
                 except Exception as e:
                     logger.warning(f"Error waiting for iteration {iteration}: {e}")
-        
+
         if self.shutdown_flag.is_set():
             logger.info("Evolution interrupted by shutdown")
         else:
-            logger.info("Evolution completed")
\ No newline at end of file
+            logger.info("Evolution completed")

From 0ff351e54a4ec94a6b876da32483ea286f4e991a Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Thu, 10 Jul 2025 13:49:31 +0800
Subject: [PATCH 2/2] Bump version to 0.0.14

Update project version from 0.0.13 to 0.0.14 in both pyproject.toml and setup.py to prepare for a new release.
---
 pyproject.toml | 2 +-
 setup.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index d73b29612..abe90c44f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "openevolve"
-version = "0.0.13"
+version = "0.0.14"
 description = "Open-source implementation of AlphaEvolve"
 readme = "README.md"
 requires-python = ">=3.9"
diff --git a/setup.py b/setup.py
index d3a277533..e876b1c90 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="openevolve",
-    version="0.0.13",
+    version="0.0.14",
     packages=find_packages(),
     include_package_data=True,
 )