From d7e47b2522e7035a503f6a33b626facb401ba95c Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Tue, 26 Aug 2025 17:58:36 +0800
Subject: [PATCH 1/3] add

---
 configs/default_config.yaml                   |  5 ++
 configs/early_stopping_example.yaml           | 38 +++++++++++++++
 .../configs/failing_config.yaml               |  7 ++-
 openevolve/config.py                          |  9 ++++
 openevolve/process_parallel.py                | 46 +++++++++++++++++++
 5 files changed, 103 insertions(+), 2 deletions(-)
 create mode 100644 configs/early_stopping_example.yaml

diff --git a/configs/default_config.yaml b/configs/default_config.yaml
index fc512c26c..d33851ec0 100644
--- a/configs/default_config.yaml
+++ b/configs/default_config.yaml
@@ -13,6 +13,11 @@ random_seed: 42                       # Random seed for reproducibility (null =
 diff_based_evolution: true            # Use diff-based evolution (true) or full rewrites (false)
 max_code_length: 10000                # Maximum allowed code length in characters
 
+# Early stopping settings
+early_stopping_patience: null         # Stop after N iterations without improvement (null = disabled)
+convergence_threshold: 0.001          # Minimum improvement required to reset patience counter
+early_stopping_metric: "combined_score"  # Metric to track for early stopping
+
 # LLM configuration
 llm:
   # Models for evolution
diff --git a/configs/early_stopping_example.yaml b/configs/early_stopping_example.yaml
new file mode 100644
index 000000000..c7259ea7f
--- /dev/null
+++ b/configs/early_stopping_example.yaml
@@ -0,0 +1,38 @@
+# OpenEvolve Configuration with Early Stopping Example
+# This configuration demonstrates how to use the early stopping feature
+
+# Basic settings
+max_iterations: 1000
+checkpoint_interval: 50
+log_level: "INFO"
+
+# Early stopping configuration - stops evolution if no improvement for 30 iterations
+early_stopping_patience: 30          # Stop after 30 iterations without improvement
+convergence_threshold: 0.01          # Minimum improvement of 0.01 required to reset patience
+early_stopping_metric: "combined_score"  # Track the combined_score metric
+
+# LLM configuration
+llm:
+  models:
+    - name: "gpt-4o-mini"
+      weight: 1.0
+  
+  api_base: "https://api.openai.com/v1"
+  temperature: 0.7
+  max_tokens: 4096
+
+# Database configuration
+database:
+  population_size: 50
+  num_islands: 3
+  migration_interval: 20
+
+# Evaluation settings
+evaluator:
+  timeout: 60
+  max_retries: 2
+  parallel_evaluations: 2
+
+# Evolution settings
+diff_based_evolution: true
+max_code_length: 8000
\ No newline at end of file
diff --git a/examples/attention_optimization/configs/failing_config.yaml b/examples/attention_optimization/configs/failing_config.yaml
index 6884ce71d..bdad7cc96 100644
--- a/examples/attention_optimization/configs/failing_config.yaml
+++ b/examples/attention_optimization/configs/failing_config.yaml
@@ -35,13 +35,16 @@ checkpoints:
   keep_best: true
   save_all_programs: false
 
+# Early stopping settings (moved to top level)
+early_stopping_patience: 50          # Stop after 50 iterations without improvement
+convergence_threshold: 0.001         # Minimum improvement required
+early_stopping_metric: "speedup"     # Track speedup metric
+
 # Optimization targets
 optimization:
   target_metric: "speedup"
   target_value: 1.32  # 32% speedup like AlphaEvolve paper
   minimize: false
-  convergence_threshold: 0.001
-  early_stopping_patience: 50
 
 # Logging
 logging:
diff --git a/openevolve/config.py b/openevolve/config.py
index 3a40f26ff..82776ae9b 100644
--- a/openevolve/config.py
+++ b/openevolve/config.py
@@ -271,6 +271,11 @@ class Config:
     # Evolution settings
     diff_based_evolution: bool = True
     max_code_length: int = 10000
+    
+    # Early stopping settings
+    early_stopping_patience: Optional[int] = None
+    convergence_threshold: float = 0.001
+    early_stopping_metric: str = "combined_score"
 
     @classmethod
     def from_yaml(cls, path: Union[str, Path]) -> "Config":
@@ -381,6 +386,10 @@ def to_dict(self) -> Dict[str, Any]:
             # Evolution settings
             "diff_based_evolution": self.diff_based_evolution,
             "max_code_length": self.max_code_length,
+            # Early stopping settings
+            "early_stopping_patience": self.early_stopping_patience,
+            "convergence_threshold": self.convergence_threshold,
+            "early_stopping_metric": self.early_stopping_metric,
         }
 
     def to_yaml(self, path: Union[str, Path]) -> None:
diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py
index f8d4805ef..daee53424 100644
--- a/openevolve/process_parallel.py
+++ b/openevolve/process_parallel.py
@@ -425,6 +425,17 @@ async def run_evolution(
         # Island management
         programs_per_island = max(1, max_iterations // (self.config.database.num_islands * 10))
         current_island_counter = 0
+        
+        # Early stopping tracking
+        early_stopping_enabled = self.config.early_stopping_patience is not None
+        if early_stopping_enabled:
+            best_score = float('-inf')
+            iterations_without_improvement = 0
+            logger.info(f"Early stopping enabled: patience={self.config.early_stopping_patience}, "
+                       f"threshold={self.config.convergence_threshold}, "
+                       f"metric={self.config.early_stopping_metric}")
+        else:
+            logger.info("Early stopping disabled")
 
         # Process results as they complete
         while (
@@ -563,6 +574,41 @@ async def run_evolution(
                                 )
                                 break
 
+                    # Check early stopping
+                    if early_stopping_enabled and child_program.metrics:
+                        # Get the metric to track for early stopping
+                        current_score = None
+                        if self.config.early_stopping_metric in child_program.metrics:
+                            current_score = child_program.metrics[self.config.early_stopping_metric]
+                        else:
+                            # Fall back to average of numeric metrics if specified metric doesn't exist
+                            numeric_metrics = [
+                                v for v in child_program.metrics.values() 
+                                if isinstance(v, (int, float)) and not isinstance(v, bool)
+                            ]
+                            if numeric_metrics:
+                                current_score = sum(numeric_metrics) / len(numeric_metrics)
+
+                        if current_score is not None and isinstance(current_score, (int, float)):
+                            # Check for improvement
+                            improvement = current_score - best_score
+                            if improvement >= self.config.convergence_threshold:
+                                best_score = current_score
+                                iterations_without_improvement = 0
+                                logger.debug(f"New best score: {best_score:.4f} (improvement: {improvement:+.4f})")
+                            else:
+                                iterations_without_improvement += 1
+                                logger.debug(f"No improvement: {iterations_without_improvement}/{self.config.early_stopping_patience}")
+
+                            # Check if we should stop
+                            if iterations_without_improvement >= self.config.early_stopping_patience:
+                                logger.info(
+                                    f"Early stopping triggered at iteration {completed_iteration}: "
+                                    f"No improvement for {iterations_without_improvement} iterations "
+                                    f"(best score: {best_score:.4f})"
+                                )
+                                break
+
             except Exception as e:
                 logger.error(f"Error processing result from iteration {completed_iteration}: {e}")
 

From 79a9e9ef888b4f9dc5fcef1852c97c4462d37cdb Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Tue, 26 Aug 2025 18:00:06 +0800
Subject: [PATCH 2/3] Update _version.py

---
 openevolve/_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openevolve/_version.py b/openevolve/_version.py
index 6c7d6089c..e0ac131a2 100644
--- a/openevolve/_version.py
+++ b/openevolve/_version.py
@@ -1,3 +1,3 @@
 """Version information for openevolve package."""
 
-__version__ = "0.2.7"
+__version__ = "0.2.8"

From 4a6fb6fc8bff359de80ff0c26864268bc05d57ad Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Tue, 26 Aug 2025 18:11:01 +0800
Subject: [PATCH 3/3] Update process_parallel.py

---
 openevolve/process_parallel.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py
index daee53424..f508b8816 100644
--- a/openevolve/process_parallel.py
+++ b/openevolve/process_parallel.py
@@ -15,6 +15,7 @@
 
 from openevolve.config import Config
 from openevolve.database import Program, ProgramDatabase
+from openevolve.utils.metrics_utils import safe_numeric_average
 
 logger = logging.getLogger(__name__)
 
@@ -145,8 +146,6 @@ def _run_iteration_worker(
         ]
 
         # Sort by metrics for top programs
-        from openevolve.utils.metrics_utils import safe_numeric_average
-
         island_programs.sort(
             key=lambda p: p.metrics.get("combined_score", safe_numeric_average(p.metrics)),
             reverse=True,
@@ -530,8 +529,6 @@ async def run_evolution(
                             "combined_score" not in child_program.metrics
                             and not self._warned_about_combined_score
                         ):
-                            from openevolve.utils.metrics_utils import safe_numeric_average
-
                             avg_score = safe_numeric_average(child_program.metrics)
                             logger.warning(
                                 f"⚠️  No 'combined_score' metric found in evaluation results. "
@@ -580,14 +577,13 @@ async def run_evolution(
                         current_score = None
                         if self.config.early_stopping_metric in child_program.metrics:
                             current_score = child_program.metrics[self.config.early_stopping_metric]
+                        elif self.config.early_stopping_metric == "combined_score":
+                            # Default metric not found, use safe average (standard pattern)
+                            current_score = safe_numeric_average(child_program.metrics)
                         else:
-                            # Fall back to average of numeric metrics if specified metric doesn't exist
-                            numeric_metrics = [
-                                v for v in child_program.metrics.values() 
-                                if isinstance(v, (int, float)) and not isinstance(v, bool)
-                            ]
-                            if numeric_metrics:
-                                current_score = sum(numeric_metrics) / len(numeric_metrics)
+                            # User specified a custom metric that doesn't exist
+                            logger.warning(f"Early stopping metric '{self.config.early_stopping_metric}' not found, using safe numeric average")
+                            current_score = safe_numeric_average(child_program.metrics)
 
                         if current_score is not None and isinstance(current_score, (int, float)):
                             # Check for improvement