diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 871411ac7..184b790a9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -29,7 +29,7 @@ black openevolve tests examples
 
 ## Pull Request Process
 
-1. Create a new branch for your feature or bugfix: `git checkout -b feature/your-feature-name`
+1. Create a new branch for your feature or bugfix: `git checkout -b feat-your-feature-name`
 2. Make your changes
 3. Add tests for your changes
 4. Run the tests to make sure everything passes: `python -m unittest discover tests`
diff --git a/configs/README.md b/configs/README.md
new file mode 100644
index 000000000..6ce24383c
--- /dev/null
+++ b/configs/README.md
@@ -0,0 +1,73 @@
+# OpenEvolve Configuration Files
+
+This directory contains configuration files for OpenEvolve with examples for different use cases.
+
+## Configuration Files
+
+### `default_config.yaml`
+The main configuration file containing all available options with sensible defaults. This file includes:
+- Complete documentation for all configuration parameters
+- Default values for all settings
+- **Island-based evolution parameters** for proper evolutionary diversity
+
+Use this file as a template for your own configurations.
+
+### `island_config_example.yaml`
+A practical example configuration demonstrating proper island-based evolution setup. Shows:
+- Recommended island settings for most use cases
+- Balanced migration parameters
+- Complete working configuration
+
+### `island_examples.yaml`
+Multiple example configurations for different scenarios:
+- **Maximum Diversity**: Many islands, frequent migration
+- **Focused Exploration**: Few islands, rare migration  
+- **Balanced Approach**: Default recommended settings
+- **Quick Exploration**: Small-scale rapid testing
+- **Large-Scale Evolution**: Complex optimization runs
+
+Includes guidelines for choosing parameters based on your problem characteristics.
+
+## Island-Based Evolution Parameters
+
+The key new parameters for proper evolutionary diversity are:
+
+```yaml
+database:
+  num_islands: 5                      # Number of separate populations
+  migration_interval: 50              # Migrate every N generations  
+  migration_rate: 0.1                 # Fraction of top programs to migrate
+```
+
+### Parameter Guidelines
+
+- **num_islands**: 3-10 for most problems (more = more diversity)
+- **migration_interval**: 25-100 generations (higher = more independence)
+- **migration_rate**: 0.05-0.2 (5%-20%, higher = faster knowledge sharing)
+
+### When to Use What
+
+- **Complex problems** → More islands, less frequent migration
+- **Simple problems** → Fewer islands, more frequent migration
+- **Long runs** → More islands to maintain diversity
+- **Short runs** → Fewer islands for faster convergence
+
+## Usage
+
+Copy any of these files as a starting point for your configuration:
+
+```bash
+cp configs/default_config.yaml my_config.yaml
+# Edit my_config.yaml for your specific needs
+```
+
+Then use with OpenEvolve:
+
+```python
+from openevolve import OpenEvolve
+evolve = OpenEvolve(
+    initial_program_path="program.py",
+    evaluation_file="evaluator.py", 
+    config_path="my_config.yaml"
+)
+```
diff --git a/configs/default_config.yaml b/configs/default_config.yaml
index f9180db8d..4bc7558aa 100644
--- a/configs/default_config.yaml
+++ b/configs/default_config.yaml
@@ -55,9 +55,7 @@ prompt:
       - "I suggest the following improvements:"
       - "We can enhance this code by:"
 
-  # Meta-prompting (experimental)
-  use_meta_prompting: false           # Use LLM to generate parts of the prompt
-  meta_prompt_weight: 0.1             # Weight for meta-prompting influence
+  # Note: meta-prompting features are not yet implemented
 
 # Database configuration
 database:
@@ -68,13 +66,19 @@ database:
   # Evolutionary parameters
   population_size: 1000               # Maximum number of programs to keep in memory
   archive_size: 100                   # Size of elite archive
-  num_islands: 5                      # Number of islands for island model
+  num_islands: 5                      # Number of islands for island model (separate populations)
+
+  # Island-based evolution parameters
+  # Islands provide diversity by maintaining separate populations that evolve independently.
+  # Migration periodically shares the best solutions between adjacent islands.
+  migration_interval: 50              # Migrate between islands every N generations
+  migration_rate: 0.1                 # Fraction of top programs to migrate (0.1 = 10%)
 
   # Selection parameters
   elite_selection_ratio: 0.1          # Ratio of elite programs to select
   exploration_ratio: 0.2              # Ratio of exploration vs exploitation
   exploitation_ratio: 0.7             # Ratio of exploitation vs random selection
-  diversity_metric: "edit_distance"   # Diversity metric (edit_distance, feature_based)
+  # Note: diversity_metric is fixed to "edit_distance" (feature_based not implemented)
 
   # Feature map dimensions for MAP-Elites
   feature_dimensions:                 # Dimensions for MAP-Elites feature map
@@ -88,9 +92,7 @@ evaluator:
   timeout: 300                        # Maximum evaluation time in seconds
   max_retries: 3                      # Maximum number of retries for evaluation
 
-  # Resource limits
-  memory_limit_mb: null               # Memory limit for evaluation (null = no limit)
-  cpu_limit: null                     # CPU limit for evaluation (null = no limit)
+  # Note: resource limits (memory_limit_mb, cpu_limit) are not yet implemented
 
   # Evaluation strategies
   cascade_evaluation: true            # Use cascade evaluation to filter bad solutions early
@@ -101,7 +103,7 @@ evaluator:
 
   # Parallel evaluation
   parallel_evaluations: 4             # Number of parallel evaluations
-  distributed: false                  # Use distributed evaluation
+  # Note: distributed evaluation is not yet implemented
 
   # LLM-based feedback (experimental)
   use_llm_feedback: false             # Use LLM to evaluate code quality
diff --git a/configs/island_config_example.yaml b/configs/island_config_example.yaml
new file mode 100644
index 000000000..7a7ab6db7
--- /dev/null
+++ b/configs/island_config_example.yaml
@@ -0,0 +1,55 @@
+# OpenEvolve Island-Based Evolution Configuration
+# This configuration demonstrates the proper use of island-based evolution
+
+# General settings
+max_iterations: 1000
+checkpoint_interval: 100
+log_level: "INFO"
+
+# LLM configuration
+llm:
+  primary_model: "gemini-2.0-flash-lite"
+  primary_model_weight: 0.8
+  secondary_model: "gemini-2.0-flash"
+  secondary_model_weight: 0.2
+  temperature: 0.7
+  top_p: 0.95
+  max_tokens: 4096
+
+# Database configuration with proper island settings
+database:
+  population_size: 500
+  archive_size: 100
+  
+  # Island-based evolution settings
+  num_islands: 5                    # Number of separate populations
+  migration_interval: 50            # Migrate every 50 generations
+  migration_rate: 0.1               # Migrate 10% of top programs
+  
+  # Selection parameters
+  elite_selection_ratio: 0.1
+  exploration_ratio: 0.3
+  exploitation_ratio: 0.7
+  # Note: diversity_metric fixed to "edit_distance"
+  
+  # Feature map dimensions for MAP-Elites
+  feature_dimensions: ["score", "complexity"]
+  feature_bins: 10
+
+# Prompt configuration
+prompt:
+  num_top_programs: 3
+  num_diverse_programs: 2
+  use_template_stochasticity: true
+
+# Evaluator configuration
+evaluator:
+  timeout: 300
+  max_retries: 3
+  cascade_evaluation: true
+  parallel_evaluations: 4
+
+# Evolution settings
+diff_based_evolution: true
+allow_full_rewrites: false
+max_code_length: 10000
diff --git a/configs/island_examples.yaml b/configs/island_examples.yaml
new file mode 100644
index 000000000..4c62ac5e0
--- /dev/null
+++ b/configs/island_examples.yaml
@@ -0,0 +1,80 @@
+# OpenEvolve Island-Based Evolution Configuration Examples
+# Different configurations for various use cases
+
+# Configuration for Maximum Diversity (Many Islands, Frequent Migration)
+# Use this when you want to explore the search space thoroughly
+# Good for: Complex problems, avoiding local optima, long runs
+max_diversity:
+  database:
+    num_islands: 10                   # More islands = more diversity
+    migration_interval: 25            # More frequent migration
+    migration_rate: 0.2               # Higher migration rate
+    population_size: 1000
+    archive_size: 200
+
+# Configuration for Focused Exploration (Few Islands, Rare Migration)  
+# Use this when you want deeper exploration within each island
+# Good for: Problems with clear structure, shorter runs
+focused_exploration:
+  database:
+    num_islands: 3                    # Fewer islands = deeper exploration
+    migration_interval: 100           # Less frequent migration
+    migration_rate: 0.05              # Lower migration rate
+    population_size: 500
+    archive_size: 50
+
+# Configuration for Balanced Approach (Default Settings)
+# Use this as a starting point for most problems
+# Good for: General use, medium-length runs
+balanced:
+  database:
+    num_islands: 5                    # Balanced number of islands
+    migration_interval: 50            # Moderate migration frequency
+    migration_rate: 0.1               # Moderate migration rate
+    population_size: 1000
+    archive_size: 100
+
+# Configuration for Quick Exploration (Small Scale)
+# Use this for rapid prototyping and testing
+# Good for: Small problems, quick experiments
+quick_exploration:
+  database:
+    num_islands: 3
+    migration_interval: 20
+    migration_rate: 0.15
+    population_size: 200
+    archive_size: 30
+
+# Configuration for Large-Scale Evolution (High Performance)
+# Use this for complex problems requiring extensive search
+# Good for: Complex optimization, long evolutionary runs
+large_scale:
+  database:
+    num_islands: 15                   # Many islands for parallel exploration
+    migration_interval: 75            # Balanced migration timing
+    migration_rate: 0.08              # Conservative migration rate
+    population_size: 2000             # Large populations
+    archive_size: 300
+
+# Guidelines for choosing parameters:
+#
+# num_islands:
+#   - More islands = more diversity, slower convergence
+#   - Fewer islands = faster convergence, risk of premature convergence
+#   - Recommended: 3-10 for most problems
+#
+# migration_interval:
+#   - Lower values = more frequent knowledge sharing
+#   - Higher values = more independent evolution
+#   - Recommended: 25-100 generations
+#
+# migration_rate:
+#   - Higher values = faster knowledge propagation
+#   - Lower values = preserve island diversity longer
+#   - Recommended: 0.05-0.2 (5%-20%)
+#
+# Rule of thumb:
+#   - Complex problems → More islands, less frequent migration
+#   - Simple problems → Fewer islands, more frequent migration
+#   - Long runs → More islands to maintain diversity
+#   - Short runs → Fewer islands for faster convergence
diff --git a/openevolve/config.py b/openevolve/config.py
index b04dc7c72..460907ba4 100644
--- a/openevolve/config.py
+++ b/openevolve/config.py
@@ -80,6 +80,13 @@ class DatabaseConfig:
     feature_dimensions: List[str] = field(default_factory=lambda: ["score", "complexity"])
     feature_bins: int = 10
 
+    # Migration parameters for island-based evolution
+    migration_interval: int = 50  # Migrate every N generations
+    migration_rate: float = 0.1  # Fraction of population to migrate
+
+    # Random seed for reproducible sampling
+    random_seed: Optional[int] = None
+
 
 @dataclass
 class EvaluatorConfig:
@@ -188,8 +195,9 @@ def to_dict(self) -> Dict[str, Any]:
                 "num_diverse_programs": self.prompt.num_diverse_programs,
                 "use_template_stochasticity": self.prompt.use_template_stochasticity,
                 "template_variations": self.prompt.template_variations,
-                "use_meta_prompting": self.prompt.use_meta_prompting,
-                "meta_prompt_weight": self.prompt.meta_prompt_weight,
+                # Note: meta-prompting features not implemented
+                # "use_meta_prompting": self.prompt.use_meta_prompting,
+                # "meta_prompt_weight": self.prompt.meta_prompt_weight,
             },
             "database": {
                 "db_path": self.database.db_path,
@@ -200,19 +208,25 @@ def to_dict(self) -> Dict[str, Any]:
                 "elite_selection_ratio": self.database.elite_selection_ratio,
                 "exploration_ratio": self.database.exploration_ratio,
                 "exploitation_ratio": self.database.exploitation_ratio,
-                "diversity_metric": self.database.diversity_metric,
+                # Note: diversity_metric fixed to "edit_distance"
+                # "diversity_metric": self.database.diversity_metric,
                 "feature_dimensions": self.database.feature_dimensions,
                 "feature_bins": self.database.feature_bins,
+                "migration_interval": self.database.migration_interval,
+                "migration_rate": self.database.migration_rate,
+                "random_seed": self.database.random_seed,
             },
             "evaluator": {
                 "timeout": self.evaluator.timeout,
                 "max_retries": self.evaluator.max_retries,
-                "memory_limit_mb": self.evaluator.memory_limit_mb,
-                "cpu_limit": self.evaluator.cpu_limit,
+                # Note: resource limits not implemented
+                # "memory_limit_mb": self.evaluator.memory_limit_mb,
+                # "cpu_limit": self.evaluator.cpu_limit,
                 "cascade_evaluation": self.evaluator.cascade_evaluation,
                 "cascade_thresholds": self.evaluator.cascade_thresholds,
                 "parallel_evaluations": self.evaluator.parallel_evaluations,
-                "distributed": self.evaluator.distributed,
+                # Note: distributed evaluation not implemented
+                # "distributed": self.evaluator.distributed,
                 "use_llm_feedback": self.evaluator.use_llm_feedback,
                 "llm_feedback_weight": self.evaluator.llm_feedback_weight,
             },
diff --git a/openevolve/controller.py b/openevolve/controller.py
index d090efae1..68d3e0c12 100644
--- a/openevolve/controller.py
+++ b/openevolve/controller.py
@@ -67,6 +67,15 @@ def __init__(
         # Set up logging
         self._setup_logging()
 
+        # Set random seed for reproducibility if specified
+        if self.config.random_seed is not None:
+            import random
+            import numpy as np
+
+            random.seed(self.config.random_seed)
+            np.random.seed(self.config.random_seed)
+            logger.info(f"Set random seed to {self.config.random_seed} for reproducibility")
+
         # Load initial program
         self.initial_program_path = initial_program_path
         self.initial_program_code = self._load_initial_program()
@@ -85,6 +94,11 @@ def __init__(
         # Initialize components
         self.llm_ensemble = LLMEnsemble(self.config.llm)
         self.prompt_sampler = PromptSampler(self.config.prompt)
+
+        # Pass random seed to database if specified
+        if self.config.random_seed is not None:
+            self.config.database.random_seed = self.config.random_seed
+
         self.database = ProgramDatabase(self.config.database)
         self.evaluator = Evaluator(self.config.evaluator, evaluation_file, self.llm_ensemble)
 
@@ -179,10 +193,27 @@ async def run(
             f"Starting evolution from iteration {start_iteration} for {max_iterations} iterations (total: {total_iterations})"
         )
 
+        # Island-based evolution variables
+        programs_per_island = max(
+            1, max_iterations // (self.config.database.num_islands * 10)
+        )  # Dynamic allocation
+        current_island_counter = 0
+
+        logger.info(f"Using island-based evolution with {self.config.database.num_islands} islands")
+        self.database.log_island_status()
+
         for i in range(start_iteration, total_iterations):
             iteration_start = time.time()
 
-            # Sample parent and inspirations
+            # Manage island evolution - switch islands periodically
+            if i > start_iteration and current_island_counter >= programs_per_island:
+                self.database.next_island()
+                current_island_counter = 0
+                logger.debug(f"Switched to island {self.database.current_island}")
+
+            current_island_counter += 1
+
+            # Sample parent and inspirations from current island
             parent, inspirations = self.database.sample()
 
             # Build prompt
@@ -252,9 +283,18 @@ async def run(
                     },
                 )
 
-                # Add to database
+                # Add to database (will be added to current island)
                 self.database.add(child_program, iteration=i + 1)
 
+                # Increment generation for current island
+                self.database.increment_island_generation()
+
+                # Check if migration should occur
+                if self.database.should_migrate():
+                    logger.info(f"Performing migration at iteration {i+1}")
+                    self.database.migrate_programs()
+                    self.database.log_island_status()
+
                 # Log progress
                 iteration_time = time.time() - iteration_start
                 self._log_iteration(i, parent, child_program, iteration_time)
@@ -271,6 +311,9 @@ async def run(
                 # Save checkpoint
                 if (i + 1) % self.config.checkpoint_interval == 0:
                     self._save_checkpoint(i + 1)
+                    # Also log island status at checkpoints
+                    logger.info(f"Island status at checkpoint {i+1}:")
+                    self.database.log_island_status()
 
                 # Check if target score reached
                 if target_score is not None:
diff --git a/openevolve/database.py b/openevolve/database.py
index e215ecfbd..772250ec1 100644
--- a/openevolve/database.py
+++ b/openevolve/database.py
@@ -1,603 +1,918 @@
-"""
-Program database for OpenEvolve
-"""
-
-import json
-import logging
-import os
-import random
-import time
-from dataclasses import asdict, dataclass, field
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Set, Tuple, Union
-
-import numpy as np
-
-from openevolve.config import DatabaseConfig
-from openevolve.utils.code_utils import calculate_edit_distance
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class Program:
-    """Represents a program in the database"""
-
-    # Program identification
-    id: str
-    code: str
-    language: str = "python"
-
-    # Evolution information
-    parent_id: Optional[str] = None
-    generation: int = 0
-    timestamp: float = field(default_factory=time.time)
-    iteration_found: int = 0  # Track which iteration this program was found
-
-    # Performance metrics
-    metrics: Dict[str, float] = field(default_factory=dict)
-
-    # Derived features
-    complexity: float = 0.0
-    diversity: float = 0.0
-
-    # Metadata
-    metadata: Dict[str, Any] = field(default_factory=dict)
-
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert to dictionary representation"""
-        return asdict(self)
-
-    @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "Program":
-        """Create from dictionary representation"""
-        return cls(**data)
-
-
-class ProgramDatabase:
-    """
-    Database for storing and sampling programs during evolution
-
-    The database implements a combination of MAP-Elites algorithm and
-    island-based population model to maintain diversity during evolution.
-    It also tracks the absolute best program separately to ensure it's never lost.
-    """
-
-    def __init__(self, config: DatabaseConfig):
-        self.config = config
-
-        # In-memory program storage
-        self.programs: Dict[str, Program] = {}
-
-        # Feature grid for MAP-Elites
-        self.feature_map: Dict[str, str] = {}
-        self.feature_bins = config.feature_bins
-
-        # Island populations
-        self.islands: List[Set[str]] = [set() for _ in range(config.num_islands)]
-
-        # Archive of elite programs
-        self.archive: Set[str] = set()
-
-        # Track the absolute best program separately
-        self.best_program_id: Optional[str] = None
-
-        # Track the last iteration number (for resuming)
-        self.last_iteration: int = 0
-
-        # Load database from disk if path is provided
-        if config.db_path and os.path.exists(config.db_path):
-            self.load(config.db_path)
-
-        logger.info(f"Initialized program database with {len(self.programs)} programs")
-
-    def add(self, program: Program, iteration: int = None) -> str:
-        """
-        Add a program to the database
-
-        Args:
-            program: Program to add
-            iteration: Current iteration (defaults to last_iteration)
-
-        Returns:
-            Program ID
-        """
-        # Store the program
-        # If iteration is provided, update the program's iteration_found
-        if iteration is not None:
-            program.iteration_found = iteration
-            # Update last_iteration if needed
-            self.last_iteration = max(self.last_iteration, iteration)
-
-        self.programs[program.id] = program
-
-        # Calculate feature coordinates for MAP-Elites
-        feature_coords = self._calculate_feature_coords(program)
-
-        # Add to feature map (replacing existing if better)
-        feature_key = self._feature_coords_to_key(feature_coords)
-        if feature_key not in self.feature_map or self._is_better(
-            program, self.programs[self.feature_map[feature_key]]
-        ):
-            self.feature_map[feature_key] = program.id
-
-        # Add to an island (randomly)
-        island_idx = random.randint(0, len(self.islands) - 1)
-        self.islands[island_idx].add(program.id)
-
-        # Update archive
-        self._update_archive(program)
-
-        # Update the absolute best program tracking
-        self._update_best_program(program)
-
-        # Save to disk if configured
-        if self.config.db_path:
-            self._save_program(program)
-
-        logger.debug(f"Added program {program.id} to database")
-        return program.id
-
-    def get(self, program_id: str) -> Optional[Program]:
-        """
-        Get a program by ID
-
-        Args:
-            program_id: Program ID
-
-        Returns:
-            Program or None if not found
-        """
-        return self.programs.get(program_id)
-
-    def sample(self) -> Tuple[Program, List[Program]]:
-        """
-        Sample a program and inspirations for the next evolution step
-
-        Returns:
-            Tuple of (parent_program, inspiration_programs)
-        """
-        # Select parent program
-        parent = self._sample_parent()
-
-        # Select inspirations
-        inspirations = self._sample_inspirations(parent, n=5)
-
-        logger.debug(f"Sampled parent {parent.id} and {len(inspirations)} inspirations")
-        return parent, inspirations
-
-    def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]:
-        """
-        Get the best program based on a metric
-
-        Args:
-            metric: Metric to use for ranking (uses combined_score or average if None)
-
-        Returns:
-            Best program or None if database is empty
-        """
-        if not self.programs:
-            return None
-
-        # If no specific metric and we have a tracked best program, return it
-        if metric is None and self.best_program_id and self.best_program_id in self.programs:
-            logger.debug(f"Using tracked best program: {self.best_program_id}")
-            return self.programs[self.best_program_id]
-
-        if metric:
-            # Sort by specific metric
-            sorted_programs = sorted(
-                [p for p in self.programs.values() if metric in p.metrics],
-                key=lambda p: p.metrics[metric],
-                reverse=True,
-            )
-            if sorted_programs:
-                logger.debug(f"Found best program by metric '{metric}': {sorted_programs[0].id}")
-        elif self.programs and all("combined_score" in p.metrics for p in self.programs.values()):
-            # Sort by combined_score if it exists (preferred method)
-            sorted_programs = sorted(
-                self.programs.values(), key=lambda p: p.metrics["combined_score"], reverse=True
-            )
-            if sorted_programs:
-                logger.debug(f"Found best program by combined_score: {sorted_programs[0].id}")
-        else:
-            # Sort by average of all metrics as fallback
-            sorted_programs = sorted(
-                self.programs.values(),
-                key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)),
-                reverse=True,
-            )
-            if sorted_programs:
-                logger.debug(f"Found best program by average metrics: {sorted_programs[0].id}")
-
-        # Update the best program tracking if we found a better program
-        if sorted_programs and (
-            self.best_program_id is None or sorted_programs[0].id != self.best_program_id
-        ):
-            old_id = self.best_program_id
-            self.best_program_id = sorted_programs[0].id
-            logger.info(f"Updated best program tracking from {old_id} to {self.best_program_id}")
-
-            # Also log the scores to help understand the update
-            if (
-                old_id
-                and old_id in self.programs
-                and "combined_score" in self.programs[old_id].metrics
-                and "combined_score" in self.programs[self.best_program_id].metrics
-            ):
-                old_score = self.programs[old_id].metrics["combined_score"]
-                new_score = self.programs[self.best_program_id].metrics["combined_score"]
-                logger.info(
-                    f"Score change: {old_score:.4f} → {new_score:.4f} ({new_score-old_score:+.4f})"
-                )
-
-        return sorted_programs[0] if sorted_programs else None
-
-    def get_top_programs(self, n: int = 10, metric: Optional[str] = None) -> List[Program]:
-        """
-        Get the top N programs based on a metric
-
-        Args:
-            n: Number of programs to return
-            metric: Metric to use for ranking (uses average if None)
-
-        Returns:
-            List of top programs
-        """
-        if not self.programs:
-            return []
-
-        if metric:
-            # Sort by specific metric
-            sorted_programs = sorted(
-                [p for p in self.programs.values() if metric in p.metrics],
-                key=lambda p: p.metrics[metric],
-                reverse=True,
-            )
-        else:
-            # Sort by average of all metrics
-            sorted_programs = sorted(
-                self.programs.values(),
-                key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)),
-                reverse=True,
-            )
-
-        return sorted_programs[:n]
-
-    def save(self, path: Optional[str] = None, iteration: int = 0) -> None:
-        """
-        Save the database to disk
-
-        Args:
-            path: Path to save to (uses config.db_path if None)
-            iteration: Current iteration number
-        """
-        save_path = path or self.config.db_path
-        if not save_path:
-            logger.warning("No database path specified, skipping save")
-            return
-
-        # Create directory if it doesn't exist
-        os.makedirs(save_path, exist_ok=True)
-
-        # Save each program
-        for program in self.programs.values():
-            self._save_program(program, save_path)
-
-        # Save metadata
-        metadata = {
-            "feature_map": self.feature_map,
-            "islands": [list(island) for island in self.islands],
-            "archive": list(self.archive),
-            "best_program_id": self.best_program_id,
-            "last_iteration": iteration or self.last_iteration,
-        }
-
-        with open(os.path.join(save_path, "metadata.json"), "w") as f:
-            json.dump(metadata, f)
-
-        logger.info(f"Saved database with {len(self.programs)} programs to {save_path}")
-
-    def load(self, path: str) -> None:
-        """
-        Load the database from disk
-
-        Args:
-            path: Path to load from
-        """
-        if not os.path.exists(path):
-            logger.warning(f"Database path {path} does not exist, skipping load")
-            return
-
-        # Load metadata
-        metadata_path = os.path.join(path, "metadata.json")
-        if os.path.exists(metadata_path):
-            with open(metadata_path, "r") as f:
-                metadata = json.load(f)
-
-            self.feature_map = metadata.get("feature_map", {})
-            self.islands = [set(island) for island in metadata.get("islands", [])]
-            self.archive = set(metadata.get("archive", []))
-            self.best_program_id = metadata.get("best_program_id")
-            self.last_iteration = metadata.get("last_iteration", 0)
-
-            logger.info(f"Loaded database metadata with last_iteration={self.last_iteration}")
-
-        # Load programs
-        programs_dir = os.path.join(path, "programs")
-        if os.path.exists(programs_dir):
-            for program_file in os.listdir(programs_dir):
-                if program_file.endswith(".json"):
-                    program_path = os.path.join(programs_dir, program_file)
-                    try:
-                        with open(program_path, "r") as f:
-                            program_data = json.load(f)
-
-                        program = Program.from_dict(program_data)
-                        self.programs[program.id] = program
-                    except Exception as e:
-                        logger.warning(f"Error loading program {program_file}: {str(e)}")
-
-        logger.info(f"Loaded database with {len(self.programs)} programs from {path}")
-
-    def _save_program(self, program: Program, base_path: Optional[str] = None) -> None:
-        """
-        Save a program to disk
-
-        Args:
-            program: Program to save
-            base_path: Base path to save to (uses config.db_path if None)
-        """
-        save_path = base_path or self.config.db_path
-        if not save_path:
-            return
-
-        # Create programs directory if it doesn't exist
-        programs_dir = os.path.join(save_path, "programs")
-        os.makedirs(programs_dir, exist_ok=True)
-
-        # Save program
-        program_path = os.path.join(programs_dir, f"{program.id}.json")
-        with open(program_path, "w") as f:
-            json.dump(program.to_dict(), f)
-
-    def _calculate_feature_coords(self, program: Program) -> List[int]:
-        """
-        Calculate feature coordinates for the MAP-Elites grid
-
-        Args:
-            program: Program to calculate features for
-
-        Returns:
-            List of feature coordinates
-        """
-        coords = []
-
-        for dim in self.config.feature_dimensions:
-            if dim == "complexity":
-                # Use code length as complexity measure
-                complexity = len(program.code)
-                bin_idx = min(int(complexity / 1000 * self.feature_bins), self.feature_bins - 1)
-                coords.append(bin_idx)
-            elif dim == "diversity":
-                # Use average edit distance to other programs
-                if len(self.programs) < 5:
-                    bin_idx = 0
-                else:
-                    sample_programs = random.sample(
-                        list(self.programs.values()), min(5, len(self.programs))
-                    )
-                    avg_distance = sum(
-                        calculate_edit_distance(program.code, other.code)
-                        for other in sample_programs
-                    ) / len(sample_programs)
-                    bin_idx = min(
-                        int(avg_distance / 1000 * self.feature_bins), self.feature_bins - 1
-                    )
-                coords.append(bin_idx)
-            elif dim == "score":
-                # Use average of metrics
-                if not program.metrics:
-                    bin_idx = 0
-                else:
-                    avg_score = sum(program.metrics.values()) / len(program.metrics)
-                    bin_idx = min(int(avg_score * self.feature_bins), self.feature_bins - 1)
-                coords.append(bin_idx)
-            elif dim in program.metrics:
-                # Use specific metric
-                score = program.metrics[dim]
-                bin_idx = min(int(score * self.feature_bins), self.feature_bins - 1)
-                coords.append(bin_idx)
-            else:
-                # Default to middle bin if feature not found
-                coords.append(self.feature_bins // 2)
-
-        return coords
-
-    def _feature_coords_to_key(self, coords: List[int]) -> str:
-        """
-        Convert feature coordinates to a string key
-
-        Args:
-            coords: Feature coordinates
-
-        Returns:
-            String key
-        """
-        return "-".join(str(c) for c in coords)
-
-    def _is_better(self, program1: Program, program2: Program) -> bool:
-        """
-        Determine if program1 is better than program2
-
-        Args:
-            program1: First program
-            program2: Second program
-
-        Returns:
-            True if program1 is better than program2
-        """
-        # If no metrics, use newest
-        if not program1.metrics and not program2.metrics:
-            return program1.timestamp > program2.timestamp
-
-        # If only one has metrics, it's better
-        if program1.metrics and not program2.metrics:
-            return True
-        if not program1.metrics and program2.metrics:
-            return False
-
-        # Check for combined_score first (this is the preferred metric)
-        if "combined_score" in program1.metrics and "combined_score" in program2.metrics:
-            return program1.metrics["combined_score"] > program2.metrics["combined_score"]
-
-        # Fallback to average of all metrics
-        avg1 = sum(program1.metrics.values()) / len(program1.metrics)
-        avg2 = sum(program2.metrics.values()) / len(program2.metrics)
-
-        return avg1 > avg2
-
-    def _update_archive(self, program: Program) -> None:
-        """
-        Update the archive of elite programs
-
-        Args:
-            program: Program to consider for archive
-        """
-        # If archive not full, add program
-        if len(self.archive) < self.config.archive_size:
-            self.archive.add(program.id)
-            return
-
-        # Otherwise, find worst program in archive
-        archive_programs = [self.programs[pid] for pid in self.archive]
-        worst_program = min(
-            archive_programs, key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics))
-        )
-
-        # Replace if new program is better
-        if self._is_better(program, worst_program):
-            self.archive.remove(worst_program.id)
-            self.archive.add(program.id)
-
-    def _update_best_program(self, program: Program) -> None:
-        """
-        Update the absolute best program tracking
-
-        Args:
-            program: Program to consider as the new best
-        """
-        # If we don't have a best program yet, this becomes the best
-        if self.best_program_id is None:
-            self.best_program_id = program.id
-            logger.debug(f"Set initial best program to {program.id}")
-            return
-
-        # Compare with current best program
-        current_best = self.programs[self.best_program_id]
-
-        # Update if the new program is better
-        if self._is_better(program, current_best):
-            old_id = self.best_program_id
-            self.best_program_id = program.id
-
-            # Log the change
-            if "combined_score" in program.metrics and "combined_score" in current_best.metrics:
-                old_score = current_best.metrics["combined_score"]
-                new_score = program.metrics["combined_score"]
-                score_diff = new_score - old_score
-                logger.info(
-                    f"New best program {program.id} replaces {old_id} (combined_score: {old_score:.4f} → {new_score:.4f}, +{score_diff:.4f})"
-                )
-            else:
-                logger.info(f"New best program {program.id} replaces {old_id}")
-
-    def _sample_parent(self) -> Program:
-        """
-        Sample a parent program for the next evolution step
-
-        Returns:
-            Parent program
-        """
-        # Decide between exploitation and exploration
-        if random.random() < self.config.exploitation_ratio and self.archive:
-            # Exploitation: Use elite program from archive
-            parent_id = random.choice(list(self.archive))
-            return self.programs[parent_id]
-
-        # Exploration: Sample from an island
-        island_idx = random.randint(0, len(self.islands) - 1)
-
-        if not self.islands[island_idx]:
-            # If island is empty, use best program
-            return self.get_best_program() or next(iter(self.programs.values()))
-
-        parent_id = random.choice(list(self.islands[island_idx]))
-        return self.programs[parent_id]
-
-    def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:
-        """
-        Sample inspiration programs for the next evolution step
-
-        Args:
-            parent: Parent program
-            n: Number of inspirations to sample
-
-        Returns:
-            List of inspiration programs
-        """
-        inspirations = []
-
-        # Always include the absolute best program if available and different from parent
-        if self.best_program_id is not None and self.best_program_id != parent.id:
-            best_program = self.programs[self.best_program_id]
-            inspirations.append(best_program)
-            logger.debug(f"Including best program {self.best_program_id} in inspirations")
-
-        # Add top programs as inspirations
-        top_n = max(1, int(n * self.config.elite_selection_ratio))
-        top_programs = self.get_top_programs(n=top_n)
-        for program in top_programs:
-            if program.id not in [p.id for p in inspirations] and program.id != parent.id:
-                inspirations.append(program)
-
-        # Add diverse programs
-        if len(self.programs) > n and len(inspirations) < n:
-            # Sample from different feature cells
-            feature_coords = self._calculate_feature_coords(parent)
-
-            # Get programs from nearby feature cells
-            nearby_programs = []
-            for _ in range(n - len(inspirations)):
-                # Perturb coordinates
-                perturbed_coords = [
-                    max(0, min(self.feature_bins - 1, c + random.randint(-1, 1)))
-                    for c in feature_coords
-                ]
-
-                # Try to get program from this cell
-                cell_key = self._feature_coords_to_key(perturbed_coords)
-                if cell_key in self.feature_map:
-                    program_id = self.feature_map[cell_key]
-                    if program_id != parent.id and program_id not in [p.id for p in inspirations]:
-                        nearby_programs.append(self.programs[program_id])
-
-            # If we need more, add random programs
-            if len(inspirations) + len(nearby_programs) < n:
-                remaining = n - len(inspirations) - len(nearby_programs)
-                all_ids = set(self.programs.keys())
-                excluded_ids = (
-                    {parent.id}
-                    .union(p.id for p in inspirations)
-                    .union(p.id for p in nearby_programs)
-                )
-                available_ids = list(all_ids - excluded_ids)
-
-                if available_ids:
-                    random_ids = random.sample(available_ids, min(remaining, len(available_ids)))
-                    random_programs = [self.programs[pid] for pid in random_ids]
-                    nearby_programs.extend(random_programs)
-
-            inspirations.extend(nearby_programs)
-
-        return inspirations[:n]
+"""
+Program database for OpenEvolve
+"""
+
+import json
+import logging
+import os
+import random
+import time
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
+
+import numpy as np
+
+from openevolve.config import DatabaseConfig
+from openevolve.utils.code_utils import calculate_edit_distance
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Program:
+    """Represents a program in the database"""
+
+    # Program identification
+    id: str
+    code: str
+    language: str = "python"
+
+    # Evolution information
+    parent_id: Optional[str] = None
+    generation: int = 0
+    timestamp: float = field(default_factory=time.time)
+    iteration_found: int = 0  # Track which iteration this program was found
+
+    # Performance metrics
+    metrics: Dict[str, float] = field(default_factory=dict)
+
+    # Derived features
+    complexity: float = 0.0
+    diversity: float = 0.0
+
+    # Metadata
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary representation"""
+        return asdict(self)
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Program":
+        """Create from dictionary representation"""
+        return cls(**data)
+
+
+class ProgramDatabase:
+    """
+    Database for storing and sampling programs during evolution
+
+    The database implements a combination of MAP-Elites algorithm and
+    island-based population model to maintain diversity during evolution.
+    It also tracks the absolute best program separately to ensure it's never lost.
+    """
+
+    def __init__(self, config: DatabaseConfig):
+        self.config = config
+
+        # In-memory program storage
+        self.programs: Dict[str, Program] = {}
+
+        # Feature grid for MAP-Elites
+        self.feature_map: Dict[str, str] = {}
+        self.feature_bins = config.feature_bins
+
+        # Island populations
+        self.islands: List[Set[str]] = [set() for _ in range(config.num_islands)]
+
+        # Island-based evolution tracking
+        self.current_island: int = 0  # Track which island we're currently evolving
+        self.island_generations: List[int] = [0] * config.num_islands
+
+        # Migration parameters
+        self.migration_interval: int = getattr(config, "migration_interval", 50)
+        self.migration_rate: float = getattr(config, "migration_rate", 0.1)
+        self.last_migration_generation: int = 0
+
+        # Archive of elite programs
+        self.archive: Set[str] = set()
+
+        # Track the absolute best program separately
+        self.best_program_id: Optional[str] = None
+
+        # Track the last iteration number (for resuming)
+        self.last_iteration: int = 0
+
+        # Load database from disk if path is provided
+        if config.db_path and os.path.exists(config.db_path):
+            self.load(config.db_path)
+
+        # Set random seed for reproducible sampling if specified
+        if config.random_seed is not None:
+            import random
+
+            random.seed(config.random_seed)
+            logger.debug(f"Database: Set random seed to {config.random_seed}")
+
+        logger.info(f"Initialized program database with {len(self.programs)} programs")
+
+    def add(
+        self, program: Program, iteration: int = None, target_island: Optional[int] = None
+    ) -> str:
+        """
+        Add a program to the database
+
+        Args:
+            program: Program to add
+            iteration: Current iteration (defaults to last_iteration)
+            target_island: Specific island to add to (uses current_island if None)
+
+        Returns:
+            Program ID
+        """
+        # Store the program
+        # If iteration is provided, update the program's iteration_found
+        if iteration is not None:
+            program.iteration_found = iteration
+            # Update last_iteration if needed
+            self.last_iteration = max(self.last_iteration, iteration)
+
+        self.programs[program.id] = program
+
+        # Enforce population size limit
+        self._enforce_population_limit()
+
+        # Calculate feature coordinates for MAP-Elites
+        feature_coords = self._calculate_feature_coords(program)
+
+        # Add to feature map (replacing existing if better)
+        feature_key = self._feature_coords_to_key(feature_coords)
+        if feature_key not in self.feature_map or self._is_better(
+            program, self.programs[self.feature_map[feature_key]]
+        ):
+            self.feature_map[feature_key] = program.id
+
+        # Add to specific island (not random!)
+        island_idx = target_island if target_island is not None else self.current_island
+        island_idx = island_idx % len(self.islands)  # Ensure valid island
+        self.islands[island_idx].add(program.id)
+
+        # Track which island this program belongs to
+        program.metadata["island"] = island_idx
+
+        # Update archive
+        self._update_archive(program)
+
+        # Update the absolute best program tracking
+        self._update_best_program(program)
+
+        # Save to disk if configured
+        if self.config.db_path:
+            self._save_program(program)
+
+        logger.debug(f"Added program {program.id} to island {island_idx}")
+        return program.id
+
+    def get(self, program_id: str) -> Optional[Program]:
+        """
+        Get a program by ID
+
+        Args:
+            program_id: Program ID
+
+        Returns:
+            Program or None if not found
+        """
+        return self.programs.get(program_id)
+
+    def sample(self) -> Tuple[Program, List[Program]]:
+        """
+        Sample a program and inspirations for the next evolution step
+
+        Returns:
+            Tuple of (parent_program, inspiration_programs)
+        """
+        # Select parent program
+        parent = self._sample_parent()
+
+        # Select inspirations
+        inspirations = self._sample_inspirations(parent, n=5)
+
+        logger.debug(f"Sampled parent {parent.id} and {len(inspirations)} inspirations")
+        return parent, inspirations
+
+    def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]:
+        """
+        Get the best program based on a metric
+
+        Args:
+            metric: Metric to use for ranking (uses combined_score or average if None)
+
+        Returns:
+            Best program or None if database is empty
+        """
+        if not self.programs:
+            return None
+
+        # If no specific metric and we have a tracked best program, return it
+        if metric is None and self.best_program_id and self.best_program_id in self.programs:
+            logger.debug(f"Using tracked best program: {self.best_program_id}")
+            return self.programs[self.best_program_id]
+
+        if metric:
+            # Sort by specific metric
+            sorted_programs = sorted(
+                [p for p in self.programs.values() if metric in p.metrics],
+                key=lambda p: p.metrics[metric],
+                reverse=True,
+            )
+            if sorted_programs:
+                logger.debug(f"Found best program by metric '{metric}': {sorted_programs[0].id}")
+        elif self.programs and all("combined_score" in p.metrics for p in self.programs.values()):
+            # Sort by combined_score if it exists (preferred method)
+            sorted_programs = sorted(
+                self.programs.values(), key=lambda p: p.metrics["combined_score"], reverse=True
+            )
+            if sorted_programs:
+                logger.debug(f"Found best program by combined_score: {sorted_programs[0].id}")
+        else:
+            # Sort by average of all metrics as fallback
+            sorted_programs = sorted(
+                self.programs.values(),
+                key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)),
+                reverse=True,
+            )
+            if sorted_programs:
+                logger.debug(f"Found best program by average metrics: {sorted_programs[0].id}")
+
+        # Update the best program tracking if we found a better program
+        if sorted_programs and (
+            self.best_program_id is None or sorted_programs[0].id != self.best_program_id
+        ):
+            old_id = self.best_program_id
+            self.best_program_id = sorted_programs[0].id
+            logger.info(f"Updated best program tracking from {old_id} to {self.best_program_id}")
+
+            # Also log the scores to help understand the update
+            if (
+                old_id
+                and old_id in self.programs
+                and "combined_score" in self.programs[old_id].metrics
+                and "combined_score" in self.programs[self.best_program_id].metrics
+            ):
+                old_score = self.programs[old_id].metrics["combined_score"]
+                new_score = self.programs[self.best_program_id].metrics["combined_score"]
+                logger.info(
+                    f"Score change: {old_score:.4f} → {new_score:.4f} ({new_score-old_score:+.4f})"
+                )
+
+        return sorted_programs[0] if sorted_programs else None
+
+    def get_top_programs(self, n: int = 10, metric: Optional[str] = None) -> List[Program]:
+        """
+        Get the top N programs based on a metric
+
+        Args:
+            n: Number of programs to return
+            metric: Metric to use for ranking (uses average if None)
+
+        Returns:
+            List of top programs
+        """
+        if not self.programs:
+            return []
+
+        if metric:
+            # Sort by specific metric
+            sorted_programs = sorted(
+                [p for p in self.programs.values() if metric in p.metrics],
+                key=lambda p: p.metrics[metric],
+                reverse=True,
+            )
+        else:
+            # Sort by average of all metrics
+            sorted_programs = sorted(
+                self.programs.values(),
+                key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)),
+                reverse=True,
+            )
+
+        return sorted_programs[:n]
+
+    def save(self, path: Optional[str] = None, iteration: int = 0) -> None:
+        """
+        Save the database to disk
+
+        Args:
+            path: Path to save to (uses config.db_path if None)
+            iteration: Current iteration number
+        """
+        save_path = path or self.config.db_path
+        if not save_path:
+            logger.warning("No database path specified, skipping save")
+            return
+
+        # Create directory if it doesn't exist
+        os.makedirs(save_path, exist_ok=True)
+
+        # Save each program
+        for program in self.programs.values():
+            self._save_program(program, save_path)
+
+        # Save metadata
+        metadata = {
+            "feature_map": self.feature_map,
+            "islands": [list(island) for island in self.islands],
+            "archive": list(self.archive),
+            "best_program_id": self.best_program_id,
+            "last_iteration": iteration or self.last_iteration,
+            "current_island": self.current_island,
+            "island_generations": self.island_generations,
+            "last_migration_generation": self.last_migration_generation,
+        }
+
+        with open(os.path.join(save_path, "metadata.json"), "w") as f:
+            json.dump(metadata, f)
+
+        logger.info(f"Saved database with {len(self.programs)} programs to {save_path}")
+
+    def load(self, path: str) -> None:
+        """
+        Load the database from disk
+
+        Args:
+            path: Path to load from
+        """
+        if not os.path.exists(path):
+            logger.warning(f"Database path {path} does not exist, skipping load")
+            return
+
+        # Load metadata
+        metadata_path = os.path.join(path, "metadata.json")
+        if os.path.exists(metadata_path):
+            with open(metadata_path, "r") as f:
+                metadata = json.load(f)
+
+            self.feature_map = metadata.get("feature_map", {})
+            self.islands = [set(island) for island in metadata.get("islands", [])]
+            self.archive = set(metadata.get("archive", []))
+            self.best_program_id = metadata.get("best_program_id")
+            self.last_iteration = metadata.get("last_iteration", 0)
+            self.current_island = metadata.get("current_island", 0)
+            self.island_generations = metadata.get("island_generations", [0] * len(self.islands))
+            self.last_migration_generation = metadata.get("last_migration_generation", 0)
+
+            # Ensure island_generations list has correct length
+            if len(self.island_generations) != len(self.islands):
+                self.island_generations = [0] * len(self.islands)
+
+            logger.info(f"Loaded database metadata with last_iteration={self.last_iteration}")
+
+        # Load programs
+        programs_dir = os.path.join(path, "programs")
+        if os.path.exists(programs_dir):
+            for program_file in os.listdir(programs_dir):
+                if program_file.endswith(".json"):
+                    program_path = os.path.join(programs_dir, program_file)
+                    try:
+                        with open(program_path, "r") as f:
+                            program_data = json.load(f)
+
+                        program = Program.from_dict(program_data)
+                        self.programs[program.id] = program
+                    except Exception as e:
+                        logger.warning(f"Error loading program {program_file}: {str(e)}")
+
+        logger.info(f"Loaded database with {len(self.programs)} programs from {path}")
+
+    def _save_program(self, program: Program, base_path: Optional[str] = None) -> None:
+        """
+        Save a program to disk
+
+        Args:
+            program: Program to save
+            base_path: Base path to save to (uses config.db_path if None)
+        """
+        save_path = base_path or self.config.db_path
+        if not save_path:
+            return
+
+        # Create programs directory if it doesn't exist
+        programs_dir = os.path.join(save_path, "programs")
+        os.makedirs(programs_dir, exist_ok=True)
+
+        # Save program
+        program_path = os.path.join(programs_dir, f"{program.id}.json")
+        with open(program_path, "w") as f:
+            json.dump(program.to_dict(), f)
+
+    def _calculate_feature_coords(self, program: Program) -> List[int]:
+        """
+        Calculate feature coordinates for the MAP-Elites grid
+
+        Args:
+            program: Program to calculate features for
+
+        Returns:
+            List of feature coordinates
+        """
+        coords = []
+
+        for dim in self.config.feature_dimensions:
+            if dim == "complexity":
+                # Use code length as complexity measure
+                complexity = len(program.code)
+                bin_idx = min(int(complexity / 1000 * self.feature_bins), self.feature_bins - 1)
+                coords.append(bin_idx)
+            elif dim == "diversity":
+                # Use average edit distance to other programs
+                if len(self.programs) < 5:
+                    bin_idx = 0
+                else:
+                    sample_programs = random.sample(
+                        list(self.programs.values()), min(5, len(self.programs))
+                    )
+                    avg_distance = sum(
+                        calculate_edit_distance(program.code, other.code)
+                        for other in sample_programs
+                    ) / len(sample_programs)
+                    bin_idx = min(
+                        int(avg_distance / 1000 * self.feature_bins), self.feature_bins - 1
+                    )
+                coords.append(bin_idx)
+            elif dim == "score":
+                # Use average of metrics
+                if not program.metrics:
+                    bin_idx = 0
+                else:
+                    avg_score = sum(program.metrics.values()) / len(program.metrics)
+                    bin_idx = min(int(avg_score * self.feature_bins), self.feature_bins - 1)
+                coords.append(bin_idx)
+            elif dim in program.metrics:
+                # Use specific metric
+                score = program.metrics[dim]
+                bin_idx = min(int(score * self.feature_bins), self.feature_bins - 1)
+                coords.append(bin_idx)
+            else:
+                # Default to middle bin if feature not found
+                coords.append(self.feature_bins // 2)
+
+        return coords
+
+    def _feature_coords_to_key(self, coords: List[int]) -> str:
+        """
+        Convert feature coordinates to a string key
+
+        Args:
+            coords: Feature coordinates
+
+        Returns:
+            String key
+        """
+        return "-".join(str(c) for c in coords)
+
+    def _is_better(self, program1: Program, program2: Program) -> bool:
+        """
+        Determine if program1 is better than program2
+
+        Args:
+            program1: First program
+            program2: Second program
+
+        Returns:
+            True if program1 is better than program2
+        """
+        # If no metrics, use newest
+        if not program1.metrics and not program2.metrics:
+            return program1.timestamp > program2.timestamp
+
+        # If only one has metrics, it's better
+        if program1.metrics and not program2.metrics:
+            return True
+        if not program1.metrics and program2.metrics:
+            return False
+
+        # Check for combined_score first (this is the preferred metric)
+        if "combined_score" in program1.metrics and "combined_score" in program2.metrics:
+            return program1.metrics["combined_score"] > program2.metrics["combined_score"]
+
+        # Fallback to average of all metrics
+        avg1 = sum(program1.metrics.values()) / len(program1.metrics)
+        avg2 = sum(program2.metrics.values()) / len(program2.metrics)
+
+        return avg1 > avg2
+
+    def _update_archive(self, program: Program) -> None:
+        """
+        Update the archive of elite programs
+
+        Args:
+            program: Program to consider for archive
+        """
+        # If archive not full, add program
+        if len(self.archive) < self.config.archive_size:
+            self.archive.add(program.id)
+            return
+
+        # Otherwise, find worst program in archive
+        archive_programs = [self.programs[pid] for pid in self.archive]
+        worst_program = min(
+            archive_programs, key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics))
+        )
+
+        # Replace if new program is better
+        if self._is_better(program, worst_program):
+            self.archive.remove(worst_program.id)
+            self.archive.add(program.id)
+
+    def _update_best_program(self, program: Program) -> None:
+        """
+        Update the absolute best program tracking
+
+        Args:
+            program: Program to consider as the new best
+        """
+        # If we don't have a best program yet, this becomes the best
+        if self.best_program_id is None:
+            self.best_program_id = program.id
+            logger.debug(f"Set initial best program to {program.id}")
+            return
+
+        # Compare with current best program
+        current_best = self.programs[self.best_program_id]
+
+        # Update if the new program is better
+        if self._is_better(program, current_best):
+            old_id = self.best_program_id
+            self.best_program_id = program.id
+
+            # Log the change
+            if "combined_score" in program.metrics and "combined_score" in current_best.metrics:
+                old_score = current_best.metrics["combined_score"]
+                new_score = program.metrics["combined_score"]
+                score_diff = new_score - old_score
+                logger.info(
+                    f"New best program {program.id} replaces {old_id} (combined_score: {old_score:.4f} → {new_score:.4f}, +{score_diff:.4f})"
+                )
+            else:
+                logger.info(f"New best program {program.id} replaces {old_id}")
+
+    def _sample_parent(self) -> Program:
+        """
+        Sample a parent program from the current island for the next evolution step
+
+        Returns:
+            Parent program from current island
+        """
+        # Use exploration_ratio and exploitation_ratio to decide sampling strategy
+        rand_val = random.random()
+
+        if rand_val < self.config.exploration_ratio:
+            # EXPLORATION: Sample from current island (diverse sampling)
+            return self._sample_exploration_parent()
+        elif rand_val < self.config.exploration_ratio + self.config.exploitation_ratio:
+            # EXPLOITATION: Sample from archive (elite programs)
+            return self._sample_exploitation_parent()
+        else:
+            # RANDOM: Sample from any program (remaining probability)
+            return self._sample_random_parent()
+
+    def _sample_exploration_parent(self) -> Program:
+        """
+        Sample a parent for exploration (from current island)
+        """
+        current_island_programs = self.islands[self.current_island]
+
+        if not current_island_programs:
+            # If current island is empty, initialize with best program or random program
+            if self.best_program_id and self.best_program_id in self.programs:
+                # Clone best program to current island
+                best_program = self.programs[self.best_program_id]
+                self.islands[self.current_island].add(self.best_program_id)
+                best_program.metadata["island"] = self.current_island
+                logger.debug(f"Initialized empty island {self.current_island} with best program")
+                return best_program
+            else:
+                # Use any available program
+                return next(iter(self.programs.values()))
+
+        # Sample from current island
+        parent_id = random.choice(list(current_island_programs))
+        return self.programs[parent_id]
+
+    def _sample_exploitation_parent(self) -> Program:
+        """
+        Sample a parent for exploitation (from archive/elite programs)
+        """
+        if not self.archive:
+            # Fallback to exploration if no archive
+            return self._sample_exploration_parent()
+
+        # Prefer programs from current island in archive
+        archive_programs_in_island = [
+            pid
+            for pid in self.archive
+            if pid in self.programs
+            and self.programs[pid].metadata.get("island") == self.current_island
+        ]
+
+        if archive_programs_in_island:
+            parent_id = random.choice(archive_programs_in_island)
+            return self.programs[parent_id]
+        else:
+            # Fall back to any archive program if current island has none
+            parent_id = random.choice(list(self.archive))
+            return self.programs[parent_id]
+
+    def _sample_random_parent(self) -> Program:
+        """
+        Sample a completely random parent from all programs
+        """
+        if not self.programs:
+            raise ValueError("No programs available for sampling")
+
+        # Sample randomly from all programs
+        program_id = random.choice(list(self.programs.keys()))
+        return self.programs[program_id]
+
+    def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:
+        """
+        Sample inspiration programs for the next evolution step
+
+        Args:
+            parent: Parent program
+            n: Number of inspirations to sample
+
+        Returns:
+            List of inspiration programs
+        """
+        inspirations = []
+
+        # Always include the absolute best program if available and different from parent
+        if self.best_program_id is not None and self.best_program_id != parent.id:
+            best_program = self.programs[self.best_program_id]
+            inspirations.append(best_program)
+            logger.debug(f"Including best program {self.best_program_id} in inspirations")
+
+        # Add top programs as inspirations
+        top_n = max(1, int(n * self.config.elite_selection_ratio))
+        top_programs = self.get_top_programs(n=top_n)
+        for program in top_programs:
+            if program.id not in [p.id for p in inspirations] and program.id != parent.id:
+                inspirations.append(program)
+
+        # Add diverse programs using config.num_diverse_programs
+        if len(self.programs) > n and len(inspirations) < n:
+            # Calculate how many diverse programs to add (up to remaining slots)
+            remaining_slots = n - len(inspirations)
+
+            # Sample from different feature cells for diversity
+            feature_coords = self._calculate_feature_coords(parent)
+
+            # Get programs from nearby feature cells
+            nearby_programs = []
+            for _ in range(remaining_slots):
+                # Perturb coordinates
+                perturbed_coords = [
+                    max(0, min(self.feature_bins - 1, c + random.randint(-1, 1)))
+                    for c in feature_coords
+                ]
+
+                # Try to get program from this cell
+                cell_key = self._feature_coords_to_key(perturbed_coords)
+                if cell_key in self.feature_map:
+                    program_id = self.feature_map[cell_key]
+                    if program_id != parent.id and program_id not in [p.id for p in inspirations]:
+                        nearby_programs.append(self.programs[program_id])
+
+            # If we need more, add random programs
+            if len(inspirations) + len(nearby_programs) < n:
+                remaining = n - len(inspirations) - len(nearby_programs)
+                all_ids = set(self.programs.keys())
+                excluded_ids = (
+                    {parent.id}
+                    .union(p.id for p in inspirations)
+                    .union(p.id for p in nearby_programs)
+                )
+                available_ids = list(all_ids - excluded_ids)
+
+                if available_ids:
+                    random_ids = random.sample(available_ids, min(remaining, len(available_ids)))
+                    random_programs = [self.programs[pid] for pid in random_ids]
+                    nearby_programs.extend(random_programs)
+
+            inspirations.extend(nearby_programs)
+
+        return inspirations[:n]
+
+    def _enforce_population_limit(self) -> None:
+        """
+        Enforce the population size limit by removing worst programs if needed
+        """
+        if len(self.programs) <= self.config.population_size:
+            return
+
+        # Calculate how many programs to remove
+        num_to_remove = len(self.programs) - self.config.population_size
+
+        logger.info(
+            f"Population size ({len(self.programs)}) exceeds limit ({self.config.population_size}), removing {num_to_remove} programs"
+        )
+
+        # Get programs sorted by fitness (worst first)
+        all_programs = list(self.programs.values())
+
+        # Sort by average metric (worst first)
+        sorted_programs = sorted(
+            all_programs,
+            key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)) if p.metrics else 0.0,
+        )
+
+        # Remove worst programs, but never remove the best program
+        programs_to_remove = []
+        for program in sorted_programs:
+            if len(programs_to_remove) >= num_to_remove:
+                break
+            # Don't remove the best program
+            if program.id != self.best_program_id:
+                programs_to_remove.append(program)
+
+        # If we still need to remove more and only have the best program protected,
+        # remove from the remaining programs anyway (but keep the absolute best)
+        if len(programs_to_remove) < num_to_remove:
+            remaining_programs = [
+                p
+                for p in sorted_programs
+                if p not in programs_to_remove and p.id != self.best_program_id
+            ]
+            additional_removals = remaining_programs[: num_to_remove - len(programs_to_remove)]
+            programs_to_remove.extend(additional_removals)
+
+        # Remove the selected programs
+        for program in programs_to_remove:
+            program_id = program.id
+
+            # Remove from main programs dict
+            if program_id in self.programs:
+                del self.programs[program_id]
+
+            # Remove from feature map
+            keys_to_remove = []
+            for key, pid in self.feature_map.items():
+                if pid == program_id:
+                    keys_to_remove.append(key)
+            for key in keys_to_remove:
+                del self.feature_map[key]
+
+            # Remove from islands
+            for island in self.islands:
+                island.discard(program_id)
+
+            # Remove from archive
+            self.archive.discard(program_id)
+
+            logger.debug(f"Removed program {program_id} due to population limit")
+
+        logger.info(f"Population size after cleanup: {len(self.programs)}")
+
+    # Island management methods
+    def set_current_island(self, island_idx: int) -> None:
+        """Set which island is currently being evolved"""
+        self.current_island = island_idx % len(self.islands)
+        logger.debug(f"Switched to evolving island {self.current_island}")
+
+    def next_island(self) -> int:
+        """Move to the next island in round-robin fashion"""
+        self.current_island = (self.current_island + 1) % len(self.islands)
+        logger.debug(f"Advanced to island {self.current_island}")
+        return self.current_island
+
+    def increment_island_generation(self, island_idx: Optional[int] = None) -> None:
+        """Increment generation counter for an island"""
+        idx = island_idx if island_idx is not None else self.current_island
+        self.island_generations[idx] += 1
+        logger.debug(f"Island {idx} generation incremented to {self.island_generations[idx]}")
+
+    def should_migrate(self) -> bool:
+        """Check if migration should occur based on generation counters"""
+        max_generation = max(self.island_generations)
+        return (max_generation - self.last_migration_generation) >= self.migration_interval
+
+    def migrate_programs(self) -> None:
+        """
+        Perform migration between islands
+
+        This should be called periodically to share good solutions between islands
+        """
+        if len(self.islands) < 2:
+            return
+
+        logger.info("Performing migration between islands")
+
+        for i, island in enumerate(self.islands):
+            if len(island) == 0:
+                continue
+
+            # Select top programs from this island for migration
+            island_programs = [self.programs[pid] for pid in island if pid in self.programs]
+            if not island_programs:
+                continue
+
+            # Sort by fitness (using combined_score or average metrics)
+            island_programs.sort(
+                key=lambda p: p.metrics.get(
+                    "combined_score", sum(p.metrics.values()) / max(1, len(p.metrics))
+                ),
+                reverse=True,
+            )
+
+            # Select top programs for migration
+            num_to_migrate = max(1, int(len(island_programs) * self.migration_rate))
+            migrants = island_programs[:num_to_migrate]
+
+            # Migrate to adjacent islands (ring topology)
+            target_islands = [(i + 1) % len(self.islands), (i - 1) % len(self.islands)]
+
+            for migrant in migrants:
+                for target_island in target_islands:
+                    # Create a copy for migration (to avoid removing from source)
+                    migrant_copy = Program(
+                        id=f"{migrant.id}_migrant_{target_island}",
+                        code=migrant.code,
+                        language=migrant.language,
+                        parent_id=migrant.id,
+                        generation=migrant.generation,
+                        metrics=migrant.metrics.copy(),
+                        metadata={**migrant.metadata, "island": target_island, "migrant": True},
+                    )
+
+                    # Add to target island
+                    self.islands[target_island].add(migrant_copy.id)
+                    self.programs[migrant_copy.id] = migrant_copy
+
+                    logger.debug(
+                        f"Migrated program {migrant.id} from island {i} to island {target_island}"
+                    )
+
+        # Update last migration generation
+        self.last_migration_generation = max(self.island_generations)
+        logger.info(f"Migration completed at generation {self.last_migration_generation}")
+
+    def get_island_stats(self) -> List[dict]:
+        """Get statistics for each island"""
+        stats = []
+
+        for i, island in enumerate(self.islands):
+            island_programs = [self.programs[pid] for pid in island if pid in self.programs]
+
+            if island_programs:
+                scores = [
+                    p.metrics.get(
+                        "combined_score", sum(p.metrics.values()) / max(1, len(p.metrics))
+                    )
+                    for p in island_programs
+                ]
+
+                best_score = max(scores) if scores else 0.0
+                avg_score = sum(scores) / len(scores) if scores else 0.0
+                diversity = self._calculate_island_diversity(island_programs)
+            else:
+                best_score = avg_score = diversity = 0.0
+
+            stats.append(
+                {
+                    "island": i,
+                    "population_size": len(island_programs),
+                    "best_score": best_score,
+                    "average_score": avg_score,
+                    "diversity": diversity,
+                    "generation": self.island_generations[i],
+                    "is_current": i == self.current_island,
+                }
+            )
+
+        return stats
+
+    def _calculate_island_diversity(self, programs: List[Program]) -> float:
+        """Calculate diversity within an island"""
+        if len(programs) < 2:
+            return 0.0
+
+        total_distance = 0
+        comparisons = 0
+
+        # Sample up to 10 programs for efficiency
+        sample_size = min(10, len(programs))
+        sample_programs = (
+            random.sample(programs, sample_size) if len(programs) > sample_size else programs
+        )
+
+        for i, prog1 in enumerate(sample_programs):
+            for prog2 in sample_programs[i + 1 :]:
+                total_distance += calculate_edit_distance(prog1.code, prog2.code)
+                comparisons += 1
+
+        return total_distance / max(1, comparisons)
+
+    def log_island_status(self) -> None:
+        """Log current status of all islands"""
+        stats = self.get_island_stats()
+        logger.info("Island Status:")
+        for stat in stats:
+            current_marker = " *" if stat["is_current"] else "  "
+            logger.info(
+                f"{current_marker} Island {stat['island']}: {stat['population_size']} programs, "
+                f"best={stat['best_score']:.4f}, avg={stat['average_score']:.4f}, "
+                f"diversity={stat['diversity']:.2f}, gen={stat['generation']}"
+            )
diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py
index 4b111f326..d7f4ed654 100644
--- a/openevolve/evaluator.py
+++ b/openevolve/evaluator.py
@@ -89,46 +89,61 @@ async def evaluate_program(
             Dictionary of metric name to score
         """
         start_time = time.time()
+        program_id_str = f" {program_id}" if program_id else ""
 
-        # Create a temporary file for the program
-        with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
-            temp_file.write(program_code.encode("utf-8"))
-            temp_file_path = temp_file.name
+        # Retry logic for evaluation
+        last_exception = None
+        for attempt in range(self.config.max_retries + 1):
+            # Create a temporary file for the program
+            with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
+                temp_file.write(program_code.encode("utf-8"))
+                temp_file_path = temp_file.name
 
-        try:
-            # Run evaluation
-            if self.config.cascade_evaluation:
-                # Run cascade evaluation
-                metrics = await self._cascade_evaluate(temp_file_path)
-            else:
-                # Run direct evaluation
-                metrics = await self._direct_evaluate(temp_file_path)
-
-            # Add LLM feedback if configured
-            if self.config.use_llm_feedback and self.llm_ensemble:
-                feedback_metrics = await self._llm_evaluate(program_code)
-
-                # Combine metrics
-                for name, value in feedback_metrics.items():
-                    metrics[f"llm_{name}"] = value * self.config.llm_feedback_weight
-
-            elapsed = time.time() - start_time
-            program_id_str = f" {program_id}" if program_id else ""
-            logger.info(
-                f"Evaluated program{program_id_str} in {elapsed:.2f}s: "
-                f"{', '.join(f'{name}={value:.4f}' for name, value in metrics.items())}"
-            )
-
-            return metrics
+            try:
+                # Run evaluation
+                if self.config.cascade_evaluation:
+                    # Run cascade evaluation
+                    metrics = await self._cascade_evaluate(temp_file_path)
+                else:
+                    # Run direct evaluation
+                    metrics = await self._direct_evaluate(temp_file_path)
 
-        except Exception as e:
-            logger.error(f"Error evaluating program: {str(e)}")
-            return {"error": 0.0}
+                # Add LLM feedback if configured
+                if self.config.use_llm_feedback and self.llm_ensemble:
+                    feedback_metrics = await self._llm_evaluate(program_code)
+
+                    # Combine metrics
+                    for name, value in feedback_metrics.items():
+                        metrics[f"llm_{name}"] = value * self.config.llm_feedback_weight
+
+                elapsed = time.time() - start_time
+                logger.info(
+                    f"Evaluated program{program_id_str} in {elapsed:.2f}s: "
+                    f"{', '.join(f'{name}={value:.4f}' for name, value in metrics.items())}"
+                )
+
+                return metrics
+
+            except Exception as e:
+                last_exception = e
+                logger.warning(
+                    f"Evaluation attempt {attempt + 1}/{self.config.max_retries + 1} failed for program{program_id_str}: {str(e)}"
+                )
+
+                # If this is not the last attempt, wait a bit before retrying
+                if attempt < self.config.max_retries:
+                    await asyncio.sleep(1.0)  # Wait 1 second before retry
+
+            finally:
+                # Clean up temporary file
+                if os.path.exists(temp_file_path):
+                    os.unlink(temp_file_path)
 
-        finally:
-            # Clean up temporary file
-            if os.path.exists(temp_file_path):
-                os.unlink(temp_file_path)
+        # All retries failed
+        logger.error(
+            f"All evaluation attempts failed for program{program_id_str}. Last error: {str(last_exception)}"
+        )
+        return {"error": 0.0}
 
     @run_in_executor
     def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
diff --git a/openevolve/llm/openai.py b/openevolve/llm/openai.py
index 0cb9cc461..9268b5703 100644
--- a/openevolve/llm/openai.py
+++ b/openevolve/llm/openai.py
@@ -36,8 +36,10 @@ def __init__(
 
     async def generate(self, prompt: str, **kwargs) -> str:
         """Generate text from a prompt"""
+        # Use default system message if not provided in kwargs
+        system_message = kwargs.pop("system_message", "You are a helpful assistant.")
         return await self.generate_with_context(
-            system_message=self.config.system_message,
+            system_message=system_message,
             messages=[{"role": "user", "content": prompt}],
             **kwargs,
         )
diff --git a/openevolve/prompt/sampler.py b/openevolve/prompt/sampler.py
index 8d59220c7..ad7a6be38 100644
--- a/openevolve/prompt/sampler.py
+++ b/openevolve/prompt/sampler.py
@@ -276,10 +276,65 @@ def _format_evolution_history(
                 + "\n\n"
             )
 
+        # Format diverse programs using num_diverse_programs config
+        diverse_programs_str = ""
+        if (
+            self.config.num_diverse_programs > 0
+            and len(top_programs) > self.config.num_top_programs
+        ):
+            # Skip the top programs we already included
+            remaining_programs = top_programs[self.config.num_top_programs :]
+
+            # Sample diverse programs from the remaining
+            num_diverse = min(self.config.num_diverse_programs, len(remaining_programs))
+            if num_diverse > 0:
+                # Use random sampling to get diverse programs
+                diverse_programs = random.sample(remaining_programs, num_diverse)
+
+                diverse_programs_str += "\n\n## Diverse Programs\n\n"
+
+                for i, program in enumerate(diverse_programs):
+                    # Extract a snippet (first 5 lines for diversity)
+                    program_code = program.get("code", "")
+                    program_snippet = "\n".join(program_code.split("\n")[:5])
+                    if len(program_code.split("\n")) > 5:
+                        program_snippet += "\n# ... (truncated)"
+
+                    # Calculate a composite score
+                    score = sum(program.get("metrics", {}).values()) / max(
+                        1, len(program.get("metrics", {}))
+                    )
+
+                    # Extract key features
+                    key_features = program.get("key_features", [])
+                    if not key_features:
+                        key_features = [
+                            f"Alternative approach to {name}"
+                            for name in list(program.get("metrics", {}).keys())[
+                                :2
+                            ]  # Just first 2 metrics
+                        ]
+
+                    key_features_str = ", ".join(key_features)
+
+                    diverse_programs_str += (
+                        top_program_template.format(
+                            program_number=f"D{i + 1}",
+                            score=f"{score:.4f}",
+                            language=language,
+                            program_snippet=program_snippet,
+                            key_features=key_features_str,
+                        )
+                        + "\n\n"
+                    )
+
+        # Combine top and diverse programs
+        combined_programs_str = top_programs_str + diverse_programs_str
+
         # Combine into full history
         return history_template.format(
             previous_attempts=previous_attempts_str.strip(),
-            top_programs=top_programs_str.strip(),
+            top_programs=combined_programs_str.strip(),
         )
 
     def _apply_template_variations(self, template: str) -> str: