diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 871411ac7..184b790a9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -29,7 +29,7 @@ black openevolve tests examples ## Pull Request Process -1. Create a new branch for your feature or bugfix: `git checkout -b feature/your-feature-name` +1. Create a new branch for your feature or bugfix: `git checkout -b feat-your-feature-name` 2. Make your changes 3. Add tests for your changes 4. Run the tests to make sure everything passes: `python -m unittest discover tests` diff --git a/configs/README.md b/configs/README.md new file mode 100644 index 000000000..6ce24383c --- /dev/null +++ b/configs/README.md @@ -0,0 +1,73 @@ +# OpenEvolve Configuration Files + +This directory contains configuration files for OpenEvolve with examples for different use cases. + +## Configuration Files + +### `default_config.yaml` +The main configuration file containing all available options with sensible defaults. This file includes: +- Complete documentation for all configuration parameters +- Default values for all settings +- **Island-based evolution parameters** for proper evolutionary diversity + +Use this file as a template for your own configurations. + +### `island_config_example.yaml` +A practical example configuration demonstrating proper island-based evolution setup. Shows: +- Recommended island settings for most use cases +- Balanced migration parameters +- Complete working configuration + +### `island_examples.yaml` +Multiple example configurations for different scenarios: +- **Maximum Diversity**: Many islands, frequent migration +- **Focused Exploration**: Few islands, rare migration +- **Balanced Approach**: Default recommended settings +- **Quick Exploration**: Small-scale rapid testing +- **Large-Scale Evolution**: Complex optimization runs + +Includes guidelines for choosing parameters based on your problem characteristics. + +## Island-Based Evolution Parameters + +The key new parameters for proper evolutionary diversity are: + +```yaml +database: + num_islands: 5 # Number of separate populations + migration_interval: 50 # Migrate every N generations + migration_rate: 0.1 # Fraction of top programs to migrate +``` + +### Parameter Guidelines + +- **num_islands**: 3-10 for most problems (more = more diversity) +- **migration_interval**: 25-100 generations (higher = more independence) +- **migration_rate**: 0.05-0.2 (5%-20%, higher = faster knowledge sharing) + +### When to Use What + +- **Complex problems** → More islands, less frequent migration +- **Simple problems** → Fewer islands, more frequent migration +- **Long runs** → More islands to maintain diversity +- **Short runs** → Fewer islands for faster convergence + +## Usage + +Copy any of these files as a starting point for your configuration: + +```bash +cp configs/default_config.yaml my_config.yaml +# Edit my_config.yaml for your specific needs +``` + +Then use with OpenEvolve: + +```python +from openevolve import OpenEvolve +evolve = OpenEvolve( + initial_program_path="program.py", + evaluation_file="evaluator.py", + config_path="my_config.yaml" +) +``` diff --git a/configs/default_config.yaml b/configs/default_config.yaml index f9180db8d..4bc7558aa 100644 --- a/configs/default_config.yaml +++ b/configs/default_config.yaml @@ -55,9 +55,7 @@ prompt: - "I suggest the following improvements:" - "We can enhance this code by:" - # Meta-prompting (experimental) - use_meta_prompting: false # Use LLM to generate parts of the prompt - meta_prompt_weight: 0.1 # Weight for meta-prompting influence + # Note: meta-prompting features are not yet implemented # Database configuration database: @@ -68,13 +66,19 @@ database: # Evolutionary parameters population_size: 1000 # Maximum number of programs to keep in memory archive_size: 100 # Size of elite archive - num_islands: 5 # Number of islands for island model + num_islands: 5 # Number of islands for island model (separate populations) + + # Island-based evolution parameters + # Islands provide diversity by maintaining separate populations that evolve independently. + # Migration periodically shares the best solutions between adjacent islands. + migration_interval: 50 # Migrate between islands every N generations + migration_rate: 0.1 # Fraction of top programs to migrate (0.1 = 10%) # Selection parameters elite_selection_ratio: 0.1 # Ratio of elite programs to select exploration_ratio: 0.2 # Ratio of exploration vs exploitation exploitation_ratio: 0.7 # Ratio of exploitation vs random selection - diversity_metric: "edit_distance" # Diversity metric (edit_distance, feature_based) + # Note: diversity_metric is fixed to "edit_distance" (feature_based not implemented) # Feature map dimensions for MAP-Elites feature_dimensions: # Dimensions for MAP-Elites feature map @@ -88,9 +92,7 @@ evaluator: timeout: 300 # Maximum evaluation time in seconds max_retries: 3 # Maximum number of retries for evaluation - # Resource limits - memory_limit_mb: null # Memory limit for evaluation (null = no limit) - cpu_limit: null # CPU limit for evaluation (null = no limit) + # Note: resource limits (memory_limit_mb, cpu_limit) are not yet implemented # Evaluation strategies cascade_evaluation: true # Use cascade evaluation to filter bad solutions early @@ -101,7 +103,7 @@ evaluator: # Parallel evaluation parallel_evaluations: 4 # Number of parallel evaluations - distributed: false # Use distributed evaluation + # Note: distributed evaluation is not yet implemented # LLM-based feedback (experimental) use_llm_feedback: false # Use LLM to evaluate code quality diff --git a/configs/island_config_example.yaml b/configs/island_config_example.yaml new file mode 100644 index 000000000..7a7ab6db7 --- /dev/null +++ b/configs/island_config_example.yaml @@ -0,0 +1,55 @@ +# OpenEvolve Island-Based Evolution Configuration +# This configuration demonstrates the proper use of island-based evolution + +# General settings +max_iterations: 1000 +checkpoint_interval: 100 +log_level: "INFO" + +# LLM configuration +llm: + primary_model: "gemini-2.0-flash-lite" + primary_model_weight: 0.8 + secondary_model: "gemini-2.0-flash" + secondary_model_weight: 0.2 + temperature: 0.7 + top_p: 0.95 + max_tokens: 4096 + +# Database configuration with proper island settings +database: + population_size: 500 + archive_size: 100 + + # Island-based evolution settings + num_islands: 5 # Number of separate populations + migration_interval: 50 # Migrate every 50 generations + migration_rate: 0.1 # Migrate 10% of top programs + + # Selection parameters + elite_selection_ratio: 0.1 + exploration_ratio: 0.3 + exploitation_ratio: 0.7 + # Note: diversity_metric fixed to "edit_distance" + + # Feature map dimensions for MAP-Elites + feature_dimensions: ["score", "complexity"] + feature_bins: 10 + +# Prompt configuration +prompt: + num_top_programs: 3 + num_diverse_programs: 2 + use_template_stochasticity: true + +# Evaluator configuration +evaluator: + timeout: 300 + max_retries: 3 + cascade_evaluation: true + parallel_evaluations: 4 + +# Evolution settings +diff_based_evolution: true +allow_full_rewrites: false +max_code_length: 10000 diff --git a/configs/island_examples.yaml b/configs/island_examples.yaml new file mode 100644 index 000000000..4c62ac5e0 --- /dev/null +++ b/configs/island_examples.yaml @@ -0,0 +1,80 @@ +# OpenEvolve Island-Based Evolution Configuration Examples +# Different configurations for various use cases + +# Configuration for Maximum Diversity (Many Islands, Frequent Migration) +# Use this when you want to explore the search space thoroughly +# Good for: Complex problems, avoiding local optima, long runs +max_diversity: + database: + num_islands: 10 # More islands = more diversity + migration_interval: 25 # More frequent migration + migration_rate: 0.2 # Higher migration rate + population_size: 1000 + archive_size: 200 + +# Configuration for Focused Exploration (Few Islands, Rare Migration) +# Use this when you want deeper exploration within each island +# Good for: Problems with clear structure, shorter runs +focused_exploration: + database: + num_islands: 3 # Fewer islands = deeper exploration + migration_interval: 100 # Less frequent migration + migration_rate: 0.05 # Lower migration rate + population_size: 500 + archive_size: 50 + +# Configuration for Balanced Approach (Default Settings) +# Use this as a starting point for most problems +# Good for: General use, medium-length runs +balanced: + database: + num_islands: 5 # Balanced number of islands + migration_interval: 50 # Moderate migration frequency + migration_rate: 0.1 # Moderate migration rate + population_size: 1000 + archive_size: 100 + +# Configuration for Quick Exploration (Small Scale) +# Use this for rapid prototyping and testing +# Good for: Small problems, quick experiments +quick_exploration: + database: + num_islands: 3 + migration_interval: 20 + migration_rate: 0.15 + population_size: 200 + archive_size: 30 + +# Configuration for Large-Scale Evolution (High Performance) +# Use this for complex problems requiring extensive search +# Good for: Complex optimization, long evolutionary runs +large_scale: + database: + num_islands: 15 # Many islands for parallel exploration + migration_interval: 75 # Balanced migration timing + migration_rate: 0.08 # Conservative migration rate + population_size: 2000 # Large populations + archive_size: 300 + +# Guidelines for choosing parameters: +# +# num_islands: +# - More islands = more diversity, slower convergence +# - Fewer islands = faster convergence, risk of premature convergence +# - Recommended: 3-10 for most problems +# +# migration_interval: +# - Lower values = more frequent knowledge sharing +# - Higher values = more independent evolution +# - Recommended: 25-100 generations +# +# migration_rate: +# - Higher values = faster knowledge propagation +# - Lower values = preserve island diversity longer +# - Recommended: 0.05-0.2 (5%-20%) +# +# Rule of thumb: +# - Complex problems → More islands, less frequent migration +# - Simple problems → Fewer islands, more frequent migration +# - Long runs → More islands to maintain diversity +# - Short runs → Fewer islands for faster convergence diff --git a/openevolve/config.py b/openevolve/config.py index b04dc7c72..460907ba4 100644 --- a/openevolve/config.py +++ b/openevolve/config.py @@ -80,6 +80,13 @@ class DatabaseConfig: feature_dimensions: List[str] = field(default_factory=lambda: ["score", "complexity"]) feature_bins: int = 10 + # Migration parameters for island-based evolution + migration_interval: int = 50 # Migrate every N generations + migration_rate: float = 0.1 # Fraction of population to migrate + + # Random seed for reproducible sampling + random_seed: Optional[int] = None + @dataclass class EvaluatorConfig: @@ -188,8 +195,9 @@ def to_dict(self) -> Dict[str, Any]: "num_diverse_programs": self.prompt.num_diverse_programs, "use_template_stochasticity": self.prompt.use_template_stochasticity, "template_variations": self.prompt.template_variations, - "use_meta_prompting": self.prompt.use_meta_prompting, - "meta_prompt_weight": self.prompt.meta_prompt_weight, + # Note: meta-prompting features not implemented + # "use_meta_prompting": self.prompt.use_meta_prompting, + # "meta_prompt_weight": self.prompt.meta_prompt_weight, }, "database": { "db_path": self.database.db_path, @@ -200,19 +208,25 @@ def to_dict(self) -> Dict[str, Any]: "elite_selection_ratio": self.database.elite_selection_ratio, "exploration_ratio": self.database.exploration_ratio, "exploitation_ratio": self.database.exploitation_ratio, - "diversity_metric": self.database.diversity_metric, + # Note: diversity_metric fixed to "edit_distance" + # "diversity_metric": self.database.diversity_metric, "feature_dimensions": self.database.feature_dimensions, "feature_bins": self.database.feature_bins, + "migration_interval": self.database.migration_interval, + "migration_rate": self.database.migration_rate, + "random_seed": self.database.random_seed, }, "evaluator": { "timeout": self.evaluator.timeout, "max_retries": self.evaluator.max_retries, - "memory_limit_mb": self.evaluator.memory_limit_mb, - "cpu_limit": self.evaluator.cpu_limit, + # Note: resource limits not implemented + # "memory_limit_mb": self.evaluator.memory_limit_mb, + # "cpu_limit": self.evaluator.cpu_limit, "cascade_evaluation": self.evaluator.cascade_evaluation, "cascade_thresholds": self.evaluator.cascade_thresholds, "parallel_evaluations": self.evaluator.parallel_evaluations, - "distributed": self.evaluator.distributed, + # Note: distributed evaluation not implemented + # "distributed": self.evaluator.distributed, "use_llm_feedback": self.evaluator.use_llm_feedback, "llm_feedback_weight": self.evaluator.llm_feedback_weight, }, diff --git a/openevolve/controller.py b/openevolve/controller.py index d090efae1..68d3e0c12 100644 --- a/openevolve/controller.py +++ b/openevolve/controller.py @@ -67,6 +67,15 @@ def __init__( # Set up logging self._setup_logging() + # Set random seed for reproducibility if specified + if self.config.random_seed is not None: + import random + import numpy as np + + random.seed(self.config.random_seed) + np.random.seed(self.config.random_seed) + logger.info(f"Set random seed to {self.config.random_seed} for reproducibility") + # Load initial program self.initial_program_path = initial_program_path self.initial_program_code = self._load_initial_program() @@ -85,6 +94,11 @@ def __init__( # Initialize components self.llm_ensemble = LLMEnsemble(self.config.llm) self.prompt_sampler = PromptSampler(self.config.prompt) + + # Pass random seed to database if specified + if self.config.random_seed is not None: + self.config.database.random_seed = self.config.random_seed + self.database = ProgramDatabase(self.config.database) self.evaluator = Evaluator(self.config.evaluator, evaluation_file, self.llm_ensemble) @@ -179,10 +193,27 @@ async def run( f"Starting evolution from iteration {start_iteration} for {max_iterations} iterations (total: {total_iterations})" ) + # Island-based evolution variables + programs_per_island = max( + 1, max_iterations // (self.config.database.num_islands * 10) + ) # Dynamic allocation + current_island_counter = 0 + + logger.info(f"Using island-based evolution with {self.config.database.num_islands} islands") + self.database.log_island_status() + for i in range(start_iteration, total_iterations): iteration_start = time.time() - # Sample parent and inspirations + # Manage island evolution - switch islands periodically + if i > start_iteration and current_island_counter >= programs_per_island: + self.database.next_island() + current_island_counter = 0 + logger.debug(f"Switched to island {self.database.current_island}") + + current_island_counter += 1 + + # Sample parent and inspirations from current island parent, inspirations = self.database.sample() # Build prompt @@ -252,9 +283,18 @@ async def run( }, ) - # Add to database + # Add to database (will be added to current island) self.database.add(child_program, iteration=i + 1) + # Increment generation for current island + self.database.increment_island_generation() + + # Check if migration should occur + if self.database.should_migrate(): + logger.info(f"Performing migration at iteration {i+1}") + self.database.migrate_programs() + self.database.log_island_status() + # Log progress iteration_time = time.time() - iteration_start self._log_iteration(i, parent, child_program, iteration_time) @@ -271,6 +311,9 @@ async def run( # Save checkpoint if (i + 1) % self.config.checkpoint_interval == 0: self._save_checkpoint(i + 1) + # Also log island status at checkpoints + logger.info(f"Island status at checkpoint {i+1}:") + self.database.log_island_status() # Check if target score reached if target_score is not None: diff --git a/openevolve/database.py b/openevolve/database.py index e215ecfbd..772250ec1 100644 --- a/openevolve/database.py +++ b/openevolve/database.py @@ -1,603 +1,918 @@ -""" -Program database for OpenEvolve -""" - -import json -import logging -import os -import random -import time -from dataclasses import asdict, dataclass, field -from pathlib import Path -from typing import Any, Dict, List, Optional, Set, Tuple, Union - -import numpy as np - -from openevolve.config import DatabaseConfig -from openevolve.utils.code_utils import calculate_edit_distance - -logger = logging.getLogger(__name__) - - -@dataclass -class Program: - """Represents a program in the database""" - - # Program identification - id: str - code: str - language: str = "python" - - # Evolution information - parent_id: Optional[str] = None - generation: int = 0 - timestamp: float = field(default_factory=time.time) - iteration_found: int = 0 # Track which iteration this program was found - - # Performance metrics - metrics: Dict[str, float] = field(default_factory=dict) - - # Derived features - complexity: float = 0.0 - diversity: float = 0.0 - - # Metadata - metadata: Dict[str, Any] = field(default_factory=dict) - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary representation""" - return asdict(self) - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "Program": - """Create from dictionary representation""" - return cls(**data) - - -class ProgramDatabase: - """ - Database for storing and sampling programs during evolution - - The database implements a combination of MAP-Elites algorithm and - island-based population model to maintain diversity during evolution. - It also tracks the absolute best program separately to ensure it's never lost. - """ - - def __init__(self, config: DatabaseConfig): - self.config = config - - # In-memory program storage - self.programs: Dict[str, Program] = {} - - # Feature grid for MAP-Elites - self.feature_map: Dict[str, str] = {} - self.feature_bins = config.feature_bins - - # Island populations - self.islands: List[Set[str]] = [set() for _ in range(config.num_islands)] - - # Archive of elite programs - self.archive: Set[str] = set() - - # Track the absolute best program separately - self.best_program_id: Optional[str] = None - - # Track the last iteration number (for resuming) - self.last_iteration: int = 0 - - # Load database from disk if path is provided - if config.db_path and os.path.exists(config.db_path): - self.load(config.db_path) - - logger.info(f"Initialized program database with {len(self.programs)} programs") - - def add(self, program: Program, iteration: int = None) -> str: - """ - Add a program to the database - - Args: - program: Program to add - iteration: Current iteration (defaults to last_iteration) - - Returns: - Program ID - """ - # Store the program - # If iteration is provided, update the program's iteration_found - if iteration is not None: - program.iteration_found = iteration - # Update last_iteration if needed - self.last_iteration = max(self.last_iteration, iteration) - - self.programs[program.id] = program - - # Calculate feature coordinates for MAP-Elites - feature_coords = self._calculate_feature_coords(program) - - # Add to feature map (replacing existing if better) - feature_key = self._feature_coords_to_key(feature_coords) - if feature_key not in self.feature_map or self._is_better( - program, self.programs[self.feature_map[feature_key]] - ): - self.feature_map[feature_key] = program.id - - # Add to an island (randomly) - island_idx = random.randint(0, len(self.islands) - 1) - self.islands[island_idx].add(program.id) - - # Update archive - self._update_archive(program) - - # Update the absolute best program tracking - self._update_best_program(program) - - # Save to disk if configured - if self.config.db_path: - self._save_program(program) - - logger.debug(f"Added program {program.id} to database") - return program.id - - def get(self, program_id: str) -> Optional[Program]: - """ - Get a program by ID - - Args: - program_id: Program ID - - Returns: - Program or None if not found - """ - return self.programs.get(program_id) - - def sample(self) -> Tuple[Program, List[Program]]: - """ - Sample a program and inspirations for the next evolution step - - Returns: - Tuple of (parent_program, inspiration_programs) - """ - # Select parent program - parent = self._sample_parent() - - # Select inspirations - inspirations = self._sample_inspirations(parent, n=5) - - logger.debug(f"Sampled parent {parent.id} and {len(inspirations)} inspirations") - return parent, inspirations - - def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]: - """ - Get the best program based on a metric - - Args: - metric: Metric to use for ranking (uses combined_score or average if None) - - Returns: - Best program or None if database is empty - """ - if not self.programs: - return None - - # If no specific metric and we have a tracked best program, return it - if metric is None and self.best_program_id and self.best_program_id in self.programs: - logger.debug(f"Using tracked best program: {self.best_program_id}") - return self.programs[self.best_program_id] - - if metric: - # Sort by specific metric - sorted_programs = sorted( - [p for p in self.programs.values() if metric in p.metrics], - key=lambda p: p.metrics[metric], - reverse=True, - ) - if sorted_programs: - logger.debug(f"Found best program by metric '{metric}': {sorted_programs[0].id}") - elif self.programs and all("combined_score" in p.metrics for p in self.programs.values()): - # Sort by combined_score if it exists (preferred method) - sorted_programs = sorted( - self.programs.values(), key=lambda p: p.metrics["combined_score"], reverse=True - ) - if sorted_programs: - logger.debug(f"Found best program by combined_score: {sorted_programs[0].id}") - else: - # Sort by average of all metrics as fallback - sorted_programs = sorted( - self.programs.values(), - key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)), - reverse=True, - ) - if sorted_programs: - logger.debug(f"Found best program by average metrics: {sorted_programs[0].id}") - - # Update the best program tracking if we found a better program - if sorted_programs and ( - self.best_program_id is None or sorted_programs[0].id != self.best_program_id - ): - old_id = self.best_program_id - self.best_program_id = sorted_programs[0].id - logger.info(f"Updated best program tracking from {old_id} to {self.best_program_id}") - - # Also log the scores to help understand the update - if ( - old_id - and old_id in self.programs - and "combined_score" in self.programs[old_id].metrics - and "combined_score" in self.programs[self.best_program_id].metrics - ): - old_score = self.programs[old_id].metrics["combined_score"] - new_score = self.programs[self.best_program_id].metrics["combined_score"] - logger.info( - f"Score change: {old_score:.4f} → {new_score:.4f} ({new_score-old_score:+.4f})" - ) - - return sorted_programs[0] if sorted_programs else None - - def get_top_programs(self, n: int = 10, metric: Optional[str] = None) -> List[Program]: - """ - Get the top N programs based on a metric - - Args: - n: Number of programs to return - metric: Metric to use for ranking (uses average if None) - - Returns: - List of top programs - """ - if not self.programs: - return [] - - if metric: - # Sort by specific metric - sorted_programs = sorted( - [p for p in self.programs.values() if metric in p.metrics], - key=lambda p: p.metrics[metric], - reverse=True, - ) - else: - # Sort by average of all metrics - sorted_programs = sorted( - self.programs.values(), - key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)), - reverse=True, - ) - - return sorted_programs[:n] - - def save(self, path: Optional[str] = None, iteration: int = 0) -> None: - """ - Save the database to disk - - Args: - path: Path to save to (uses config.db_path if None) - iteration: Current iteration number - """ - save_path = path or self.config.db_path - if not save_path: - logger.warning("No database path specified, skipping save") - return - - # Create directory if it doesn't exist - os.makedirs(save_path, exist_ok=True) - - # Save each program - for program in self.programs.values(): - self._save_program(program, save_path) - - # Save metadata - metadata = { - "feature_map": self.feature_map, - "islands": [list(island) for island in self.islands], - "archive": list(self.archive), - "best_program_id": self.best_program_id, - "last_iteration": iteration or self.last_iteration, - } - - with open(os.path.join(save_path, "metadata.json"), "w") as f: - json.dump(metadata, f) - - logger.info(f"Saved database with {len(self.programs)} programs to {save_path}") - - def load(self, path: str) -> None: - """ - Load the database from disk - - Args: - path: Path to load from - """ - if not os.path.exists(path): - logger.warning(f"Database path {path} does not exist, skipping load") - return - - # Load metadata - metadata_path = os.path.join(path, "metadata.json") - if os.path.exists(metadata_path): - with open(metadata_path, "r") as f: - metadata = json.load(f) - - self.feature_map = metadata.get("feature_map", {}) - self.islands = [set(island) for island in metadata.get("islands", [])] - self.archive = set(metadata.get("archive", [])) - self.best_program_id = metadata.get("best_program_id") - self.last_iteration = metadata.get("last_iteration", 0) - - logger.info(f"Loaded database metadata with last_iteration={self.last_iteration}") - - # Load programs - programs_dir = os.path.join(path, "programs") - if os.path.exists(programs_dir): - for program_file in os.listdir(programs_dir): - if program_file.endswith(".json"): - program_path = os.path.join(programs_dir, program_file) - try: - with open(program_path, "r") as f: - program_data = json.load(f) - - program = Program.from_dict(program_data) - self.programs[program.id] = program - except Exception as e: - logger.warning(f"Error loading program {program_file}: {str(e)}") - - logger.info(f"Loaded database with {len(self.programs)} programs from {path}") - - def _save_program(self, program: Program, base_path: Optional[str] = None) -> None: - """ - Save a program to disk - - Args: - program: Program to save - base_path: Base path to save to (uses config.db_path if None) - """ - save_path = base_path or self.config.db_path - if not save_path: - return - - # Create programs directory if it doesn't exist - programs_dir = os.path.join(save_path, "programs") - os.makedirs(programs_dir, exist_ok=True) - - # Save program - program_path = os.path.join(programs_dir, f"{program.id}.json") - with open(program_path, "w") as f: - json.dump(program.to_dict(), f) - - def _calculate_feature_coords(self, program: Program) -> List[int]: - """ - Calculate feature coordinates for the MAP-Elites grid - - Args: - program: Program to calculate features for - - Returns: - List of feature coordinates - """ - coords = [] - - for dim in self.config.feature_dimensions: - if dim == "complexity": - # Use code length as complexity measure - complexity = len(program.code) - bin_idx = min(int(complexity / 1000 * self.feature_bins), self.feature_bins - 1) - coords.append(bin_idx) - elif dim == "diversity": - # Use average edit distance to other programs - if len(self.programs) < 5: - bin_idx = 0 - else: - sample_programs = random.sample( - list(self.programs.values()), min(5, len(self.programs)) - ) - avg_distance = sum( - calculate_edit_distance(program.code, other.code) - for other in sample_programs - ) / len(sample_programs) - bin_idx = min( - int(avg_distance / 1000 * self.feature_bins), self.feature_bins - 1 - ) - coords.append(bin_idx) - elif dim == "score": - # Use average of metrics - if not program.metrics: - bin_idx = 0 - else: - avg_score = sum(program.metrics.values()) / len(program.metrics) - bin_idx = min(int(avg_score * self.feature_bins), self.feature_bins - 1) - coords.append(bin_idx) - elif dim in program.metrics: - # Use specific metric - score = program.metrics[dim] - bin_idx = min(int(score * self.feature_bins), self.feature_bins - 1) - coords.append(bin_idx) - else: - # Default to middle bin if feature not found - coords.append(self.feature_bins // 2) - - return coords - - def _feature_coords_to_key(self, coords: List[int]) -> str: - """ - Convert feature coordinates to a string key - - Args: - coords: Feature coordinates - - Returns: - String key - """ - return "-".join(str(c) for c in coords) - - def _is_better(self, program1: Program, program2: Program) -> bool: - """ - Determine if program1 is better than program2 - - Args: - program1: First program - program2: Second program - - Returns: - True if program1 is better than program2 - """ - # If no metrics, use newest - if not program1.metrics and not program2.metrics: - return program1.timestamp > program2.timestamp - - # If only one has metrics, it's better - if program1.metrics and not program2.metrics: - return True - if not program1.metrics and program2.metrics: - return False - - # Check for combined_score first (this is the preferred metric) - if "combined_score" in program1.metrics and "combined_score" in program2.metrics: - return program1.metrics["combined_score"] > program2.metrics["combined_score"] - - # Fallback to average of all metrics - avg1 = sum(program1.metrics.values()) / len(program1.metrics) - avg2 = sum(program2.metrics.values()) / len(program2.metrics) - - return avg1 > avg2 - - def _update_archive(self, program: Program) -> None: - """ - Update the archive of elite programs - - Args: - program: Program to consider for archive - """ - # If archive not full, add program - if len(self.archive) < self.config.archive_size: - self.archive.add(program.id) - return - - # Otherwise, find worst program in archive - archive_programs = [self.programs[pid] for pid in self.archive] - worst_program = min( - archive_programs, key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)) - ) - - # Replace if new program is better - if self._is_better(program, worst_program): - self.archive.remove(worst_program.id) - self.archive.add(program.id) - - def _update_best_program(self, program: Program) -> None: - """ - Update the absolute best program tracking - - Args: - program: Program to consider as the new best - """ - # If we don't have a best program yet, this becomes the best - if self.best_program_id is None: - self.best_program_id = program.id - logger.debug(f"Set initial best program to {program.id}") - return - - # Compare with current best program - current_best = self.programs[self.best_program_id] - - # Update if the new program is better - if self._is_better(program, current_best): - old_id = self.best_program_id - self.best_program_id = program.id - - # Log the change - if "combined_score" in program.metrics and "combined_score" in current_best.metrics: - old_score = current_best.metrics["combined_score"] - new_score = program.metrics["combined_score"] - score_diff = new_score - old_score - logger.info( - f"New best program {program.id} replaces {old_id} (combined_score: {old_score:.4f} → {new_score:.4f}, +{score_diff:.4f})" - ) - else: - logger.info(f"New best program {program.id} replaces {old_id}") - - def _sample_parent(self) -> Program: - """ - Sample a parent program for the next evolution step - - Returns: - Parent program - """ - # Decide between exploitation and exploration - if random.random() < self.config.exploitation_ratio and self.archive: - # Exploitation: Use elite program from archive - parent_id = random.choice(list(self.archive)) - return self.programs[parent_id] - - # Exploration: Sample from an island - island_idx = random.randint(0, len(self.islands) - 1) - - if not self.islands[island_idx]: - # If island is empty, use best program - return self.get_best_program() or next(iter(self.programs.values())) - - parent_id = random.choice(list(self.islands[island_idx])) - return self.programs[parent_id] - - def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]: - """ - Sample inspiration programs for the next evolution step - - Args: - parent: Parent program - n: Number of inspirations to sample - - Returns: - List of inspiration programs - """ - inspirations = [] - - # Always include the absolute best program if available and different from parent - if self.best_program_id is not None and self.best_program_id != parent.id: - best_program = self.programs[self.best_program_id] - inspirations.append(best_program) - logger.debug(f"Including best program {self.best_program_id} in inspirations") - - # Add top programs as inspirations - top_n = max(1, int(n * self.config.elite_selection_ratio)) - top_programs = self.get_top_programs(n=top_n) - for program in top_programs: - if program.id not in [p.id for p in inspirations] and program.id != parent.id: - inspirations.append(program) - - # Add diverse programs - if len(self.programs) > n and len(inspirations) < n: - # Sample from different feature cells - feature_coords = self._calculate_feature_coords(parent) - - # Get programs from nearby feature cells - nearby_programs = [] - for _ in range(n - len(inspirations)): - # Perturb coordinates - perturbed_coords = [ - max(0, min(self.feature_bins - 1, c + random.randint(-1, 1))) - for c in feature_coords - ] - - # Try to get program from this cell - cell_key = self._feature_coords_to_key(perturbed_coords) - if cell_key in self.feature_map: - program_id = self.feature_map[cell_key] - if program_id != parent.id and program_id not in [p.id for p in inspirations]: - nearby_programs.append(self.programs[program_id]) - - # If we need more, add random programs - if len(inspirations) + len(nearby_programs) < n: - remaining = n - len(inspirations) - len(nearby_programs) - all_ids = set(self.programs.keys()) - excluded_ids = ( - {parent.id} - .union(p.id for p in inspirations) - .union(p.id for p in nearby_programs) - ) - available_ids = list(all_ids - excluded_ids) - - if available_ids: - random_ids = random.sample(available_ids, min(remaining, len(available_ids))) - random_programs = [self.programs[pid] for pid in random_ids] - nearby_programs.extend(random_programs) - - inspirations.extend(nearby_programs) - - return inspirations[:n] +""" +Program database for OpenEvolve +""" + +import json +import logging +import os +import random +import time +from dataclasses import asdict, dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional, Set, Tuple, Union + +import numpy as np + +from openevolve.config import DatabaseConfig +from openevolve.utils.code_utils import calculate_edit_distance + +logger = logging.getLogger(__name__) + + +@dataclass +class Program: + """Represents a program in the database""" + + # Program identification + id: str + code: str + language: str = "python" + + # Evolution information + parent_id: Optional[str] = None + generation: int = 0 + timestamp: float = field(default_factory=time.time) + iteration_found: int = 0 # Track which iteration this program was found + + # Performance metrics + metrics: Dict[str, float] = field(default_factory=dict) + + # Derived features + complexity: float = 0.0 + diversity: float = 0.0 + + # Metadata + metadata: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary representation""" + return asdict(self) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "Program": + """Create from dictionary representation""" + return cls(**data) + + +class ProgramDatabase: + """ + Database for storing and sampling programs during evolution + + The database implements a combination of MAP-Elites algorithm and + island-based population model to maintain diversity during evolution. + It also tracks the absolute best program separately to ensure it's never lost. + """ + + def __init__(self, config: DatabaseConfig): + self.config = config + + # In-memory program storage + self.programs: Dict[str, Program] = {} + + # Feature grid for MAP-Elites + self.feature_map: Dict[str, str] = {} + self.feature_bins = config.feature_bins + + # Island populations + self.islands: List[Set[str]] = [set() for _ in range(config.num_islands)] + + # Island-based evolution tracking + self.current_island: int = 0 # Track which island we're currently evolving + self.island_generations: List[int] = [0] * config.num_islands + + # Migration parameters + self.migration_interval: int = getattr(config, "migration_interval", 50) + self.migration_rate: float = getattr(config, "migration_rate", 0.1) + self.last_migration_generation: int = 0 + + # Archive of elite programs + self.archive: Set[str] = set() + + # Track the absolute best program separately + self.best_program_id: Optional[str] = None + + # Track the last iteration number (for resuming) + self.last_iteration: int = 0 + + # Load database from disk if path is provided + if config.db_path and os.path.exists(config.db_path): + self.load(config.db_path) + + # Set random seed for reproducible sampling if specified + if config.random_seed is not None: + import random + + random.seed(config.random_seed) + logger.debug(f"Database: Set random seed to {config.random_seed}") + + logger.info(f"Initialized program database with {len(self.programs)} programs") + + def add( + self, program: Program, iteration: int = None, target_island: Optional[int] = None + ) -> str: + """ + Add a program to the database + + Args: + program: Program to add + iteration: Current iteration (defaults to last_iteration) + target_island: Specific island to add to (uses current_island if None) + + Returns: + Program ID + """ + # Store the program + # If iteration is provided, update the program's iteration_found + if iteration is not None: + program.iteration_found = iteration + # Update last_iteration if needed + self.last_iteration = max(self.last_iteration, iteration) + + self.programs[program.id] = program + + # Enforce population size limit + self._enforce_population_limit() + + # Calculate feature coordinates for MAP-Elites + feature_coords = self._calculate_feature_coords(program) + + # Add to feature map (replacing existing if better) + feature_key = self._feature_coords_to_key(feature_coords) + if feature_key not in self.feature_map or self._is_better( + program, self.programs[self.feature_map[feature_key]] + ): + self.feature_map[feature_key] = program.id + + # Add to specific island (not random!) + island_idx = target_island if target_island is not None else self.current_island + island_idx = island_idx % len(self.islands) # Ensure valid island + self.islands[island_idx].add(program.id) + + # Track which island this program belongs to + program.metadata["island"] = island_idx + + # Update archive + self._update_archive(program) + + # Update the absolute best program tracking + self._update_best_program(program) + + # Save to disk if configured + if self.config.db_path: + self._save_program(program) + + logger.debug(f"Added program {program.id} to island {island_idx}") + return program.id + + def get(self, program_id: str) -> Optional[Program]: + """ + Get a program by ID + + Args: + program_id: Program ID + + Returns: + Program or None if not found + """ + return self.programs.get(program_id) + + def sample(self) -> Tuple[Program, List[Program]]: + """ + Sample a program and inspirations for the next evolution step + + Returns: + Tuple of (parent_program, inspiration_programs) + """ + # Select parent program + parent = self._sample_parent() + + # Select inspirations + inspirations = self._sample_inspirations(parent, n=5) + + logger.debug(f"Sampled parent {parent.id} and {len(inspirations)} inspirations") + return parent, inspirations + + def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]: + """ + Get the best program based on a metric + + Args: + metric: Metric to use for ranking (uses combined_score or average if None) + + Returns: + Best program or None if database is empty + """ + if not self.programs: + return None + + # If no specific metric and we have a tracked best program, return it + if metric is None and self.best_program_id and self.best_program_id in self.programs: + logger.debug(f"Using tracked best program: {self.best_program_id}") + return self.programs[self.best_program_id] + + if metric: + # Sort by specific metric + sorted_programs = sorted( + [p for p in self.programs.values() if metric in p.metrics], + key=lambda p: p.metrics[metric], + reverse=True, + ) + if sorted_programs: + logger.debug(f"Found best program by metric '{metric}': {sorted_programs[0].id}") + elif self.programs and all("combined_score" in p.metrics for p in self.programs.values()): + # Sort by combined_score if it exists (preferred method) + sorted_programs = sorted( + self.programs.values(), key=lambda p: p.metrics["combined_score"], reverse=True + ) + if sorted_programs: + logger.debug(f"Found best program by combined_score: {sorted_programs[0].id}") + else: + # Sort by average of all metrics as fallback + sorted_programs = sorted( + self.programs.values(), + key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)), + reverse=True, + ) + if sorted_programs: + logger.debug(f"Found best program by average metrics: {sorted_programs[0].id}") + + # Update the best program tracking if we found a better program + if sorted_programs and ( + self.best_program_id is None or sorted_programs[0].id != self.best_program_id + ): + old_id = self.best_program_id + self.best_program_id = sorted_programs[0].id + logger.info(f"Updated best program tracking from {old_id} to {self.best_program_id}") + + # Also log the scores to help understand the update + if ( + old_id + and old_id in self.programs + and "combined_score" in self.programs[old_id].metrics + and "combined_score" in self.programs[self.best_program_id].metrics + ): + old_score = self.programs[old_id].metrics["combined_score"] + new_score = self.programs[self.best_program_id].metrics["combined_score"] + logger.info( + f"Score change: {old_score:.4f} → {new_score:.4f} ({new_score-old_score:+.4f})" + ) + + return sorted_programs[0] if sorted_programs else None + + def get_top_programs(self, n: int = 10, metric: Optional[str] = None) -> List[Program]: + """ + Get the top N programs based on a metric + + Args: + n: Number of programs to return + metric: Metric to use for ranking (uses average if None) + + Returns: + List of top programs + """ + if not self.programs: + return [] + + if metric: + # Sort by specific metric + sorted_programs = sorted( + [p for p in self.programs.values() if metric in p.metrics], + key=lambda p: p.metrics[metric], + reverse=True, + ) + else: + # Sort by average of all metrics + sorted_programs = sorted( + self.programs.values(), + key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)), + reverse=True, + ) + + return sorted_programs[:n] + + def save(self, path: Optional[str] = None, iteration: int = 0) -> None: + """ + Save the database to disk + + Args: + path: Path to save to (uses config.db_path if None) + iteration: Current iteration number + """ + save_path = path or self.config.db_path + if not save_path: + logger.warning("No database path specified, skipping save") + return + + # Create directory if it doesn't exist + os.makedirs(save_path, exist_ok=True) + + # Save each program + for program in self.programs.values(): + self._save_program(program, save_path) + + # Save metadata + metadata = { + "feature_map": self.feature_map, + "islands": [list(island) for island in self.islands], + "archive": list(self.archive), + "best_program_id": self.best_program_id, + "last_iteration": iteration or self.last_iteration, + "current_island": self.current_island, + "island_generations": self.island_generations, + "last_migration_generation": self.last_migration_generation, + } + + with open(os.path.join(save_path, "metadata.json"), "w") as f: + json.dump(metadata, f) + + logger.info(f"Saved database with {len(self.programs)} programs to {save_path}") + + def load(self, path: str) -> None: + """ + Load the database from disk + + Args: + path: Path to load from + """ + if not os.path.exists(path): + logger.warning(f"Database path {path} does not exist, skipping load") + return + + # Load metadata + metadata_path = os.path.join(path, "metadata.json") + if os.path.exists(metadata_path): + with open(metadata_path, "r") as f: + metadata = json.load(f) + + self.feature_map = metadata.get("feature_map", {}) + self.islands = [set(island) for island in metadata.get("islands", [])] + self.archive = set(metadata.get("archive", [])) + self.best_program_id = metadata.get("best_program_id") + self.last_iteration = metadata.get("last_iteration", 0) + self.current_island = metadata.get("current_island", 0) + self.island_generations = metadata.get("island_generations", [0] * len(self.islands)) + self.last_migration_generation = metadata.get("last_migration_generation", 0) + + # Ensure island_generations list has correct length + if len(self.island_generations) != len(self.islands): + self.island_generations = [0] * len(self.islands) + + logger.info(f"Loaded database metadata with last_iteration={self.last_iteration}") + + # Load programs + programs_dir = os.path.join(path, "programs") + if os.path.exists(programs_dir): + for program_file in os.listdir(programs_dir): + if program_file.endswith(".json"): + program_path = os.path.join(programs_dir, program_file) + try: + with open(program_path, "r") as f: + program_data = json.load(f) + + program = Program.from_dict(program_data) + self.programs[program.id] = program + except Exception as e: + logger.warning(f"Error loading program {program_file}: {str(e)}") + + logger.info(f"Loaded database with {len(self.programs)} programs from {path}") + + def _save_program(self, program: Program, base_path: Optional[str] = None) -> None: + """ + Save a program to disk + + Args: + program: Program to save + base_path: Base path to save to (uses config.db_path if None) + """ + save_path = base_path or self.config.db_path + if not save_path: + return + + # Create programs directory if it doesn't exist + programs_dir = os.path.join(save_path, "programs") + os.makedirs(programs_dir, exist_ok=True) + + # Save program + program_path = os.path.join(programs_dir, f"{program.id}.json") + with open(program_path, "w") as f: + json.dump(program.to_dict(), f) + + def _calculate_feature_coords(self, program: Program) -> List[int]: + """ + Calculate feature coordinates for the MAP-Elites grid + + Args: + program: Program to calculate features for + + Returns: + List of feature coordinates + """ + coords = [] + + for dim in self.config.feature_dimensions: + if dim == "complexity": + # Use code length as complexity measure + complexity = len(program.code) + bin_idx = min(int(complexity / 1000 * self.feature_bins), self.feature_bins - 1) + coords.append(bin_idx) + elif dim == "diversity": + # Use average edit distance to other programs + if len(self.programs) < 5: + bin_idx = 0 + else: + sample_programs = random.sample( + list(self.programs.values()), min(5, len(self.programs)) + ) + avg_distance = sum( + calculate_edit_distance(program.code, other.code) + for other in sample_programs + ) / len(sample_programs) + bin_idx = min( + int(avg_distance / 1000 * self.feature_bins), self.feature_bins - 1 + ) + coords.append(bin_idx) + elif dim == "score": + # Use average of metrics + if not program.metrics: + bin_idx = 0 + else: + avg_score = sum(program.metrics.values()) / len(program.metrics) + bin_idx = min(int(avg_score * self.feature_bins), self.feature_bins - 1) + coords.append(bin_idx) + elif dim in program.metrics: + # Use specific metric + score = program.metrics[dim] + bin_idx = min(int(score * self.feature_bins), self.feature_bins - 1) + coords.append(bin_idx) + else: + # Default to middle bin if feature not found + coords.append(self.feature_bins // 2) + + return coords + + def _feature_coords_to_key(self, coords: List[int]) -> str: + """ + Convert feature coordinates to a string key + + Args: + coords: Feature coordinates + + Returns: + String key + """ + return "-".join(str(c) for c in coords) + + def _is_better(self, program1: Program, program2: Program) -> bool: + """ + Determine if program1 is better than program2 + + Args: + program1: First program + program2: Second program + + Returns: + True if program1 is better than program2 + """ + # If no metrics, use newest + if not program1.metrics and not program2.metrics: + return program1.timestamp > program2.timestamp + + # If only one has metrics, it's better + if program1.metrics and not program2.metrics: + return True + if not program1.metrics and program2.metrics: + return False + + # Check for combined_score first (this is the preferred metric) + if "combined_score" in program1.metrics and "combined_score" in program2.metrics: + return program1.metrics["combined_score"] > program2.metrics["combined_score"] + + # Fallback to average of all metrics + avg1 = sum(program1.metrics.values()) / len(program1.metrics) + avg2 = sum(program2.metrics.values()) / len(program2.metrics) + + return avg1 > avg2 + + def _update_archive(self, program: Program) -> None: + """ + Update the archive of elite programs + + Args: + program: Program to consider for archive + """ + # If archive not full, add program + if len(self.archive) < self.config.archive_size: + self.archive.add(program.id) + return + + # Otherwise, find worst program in archive + archive_programs = [self.programs[pid] for pid in self.archive] + worst_program = min( + archive_programs, key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)) + ) + + # Replace if new program is better + if self._is_better(program, worst_program): + self.archive.remove(worst_program.id) + self.archive.add(program.id) + + def _update_best_program(self, program: Program) -> None: + """ + Update the absolute best program tracking + + Args: + program: Program to consider as the new best + """ + # If we don't have a best program yet, this becomes the best + if self.best_program_id is None: + self.best_program_id = program.id + logger.debug(f"Set initial best program to {program.id}") + return + + # Compare with current best program + current_best = self.programs[self.best_program_id] + + # Update if the new program is better + if self._is_better(program, current_best): + old_id = self.best_program_id + self.best_program_id = program.id + + # Log the change + if "combined_score" in program.metrics and "combined_score" in current_best.metrics: + old_score = current_best.metrics["combined_score"] + new_score = program.metrics["combined_score"] + score_diff = new_score - old_score + logger.info( + f"New best program {program.id} replaces {old_id} (combined_score: {old_score:.4f} → {new_score:.4f}, +{score_diff:.4f})" + ) + else: + logger.info(f"New best program {program.id} replaces {old_id}") + + def _sample_parent(self) -> Program: + """ + Sample a parent program from the current island for the next evolution step + + Returns: + Parent program from current island + """ + # Use exploration_ratio and exploitation_ratio to decide sampling strategy + rand_val = random.random() + + if rand_val < self.config.exploration_ratio: + # EXPLORATION: Sample from current island (diverse sampling) + return self._sample_exploration_parent() + elif rand_val < self.config.exploration_ratio + self.config.exploitation_ratio: + # EXPLOITATION: Sample from archive (elite programs) + return self._sample_exploitation_parent() + else: + # RANDOM: Sample from any program (remaining probability) + return self._sample_random_parent() + + def _sample_exploration_parent(self) -> Program: + """ + Sample a parent for exploration (from current island) + """ + current_island_programs = self.islands[self.current_island] + + if not current_island_programs: + # If current island is empty, initialize with best program or random program + if self.best_program_id and self.best_program_id in self.programs: + # Clone best program to current island + best_program = self.programs[self.best_program_id] + self.islands[self.current_island].add(self.best_program_id) + best_program.metadata["island"] = self.current_island + logger.debug(f"Initialized empty island {self.current_island} with best program") + return best_program + else: + # Use any available program + return next(iter(self.programs.values())) + + # Sample from current island + parent_id = random.choice(list(current_island_programs)) + return self.programs[parent_id] + + def _sample_exploitation_parent(self) -> Program: + """ + Sample a parent for exploitation (from archive/elite programs) + """ + if not self.archive: + # Fallback to exploration if no archive + return self._sample_exploration_parent() + + # Prefer programs from current island in archive + archive_programs_in_island = [ + pid + for pid in self.archive + if pid in self.programs + and self.programs[pid].metadata.get("island") == self.current_island + ] + + if archive_programs_in_island: + parent_id = random.choice(archive_programs_in_island) + return self.programs[parent_id] + else: + # Fall back to any archive program if current island has none + parent_id = random.choice(list(self.archive)) + return self.programs[parent_id] + + def _sample_random_parent(self) -> Program: + """ + Sample a completely random parent from all programs + """ + if not self.programs: + raise ValueError("No programs available for sampling") + + # Sample randomly from all programs + program_id = random.choice(list(self.programs.keys())) + return self.programs[program_id] + + def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]: + """ + Sample inspiration programs for the next evolution step + + Args: + parent: Parent program + n: Number of inspirations to sample + + Returns: + List of inspiration programs + """ + inspirations = [] + + # Always include the absolute best program if available and different from parent + if self.best_program_id is not None and self.best_program_id != parent.id: + best_program = self.programs[self.best_program_id] + inspirations.append(best_program) + logger.debug(f"Including best program {self.best_program_id} in inspirations") + + # Add top programs as inspirations + top_n = max(1, int(n * self.config.elite_selection_ratio)) + top_programs = self.get_top_programs(n=top_n) + for program in top_programs: + if program.id not in [p.id for p in inspirations] and program.id != parent.id: + inspirations.append(program) + + # Add diverse programs using config.num_diverse_programs + if len(self.programs) > n and len(inspirations) < n: + # Calculate how many diverse programs to add (up to remaining slots) + remaining_slots = n - len(inspirations) + + # Sample from different feature cells for diversity + feature_coords = self._calculate_feature_coords(parent) + + # Get programs from nearby feature cells + nearby_programs = [] + for _ in range(remaining_slots): + # Perturb coordinates + perturbed_coords = [ + max(0, min(self.feature_bins - 1, c + random.randint(-1, 1))) + for c in feature_coords + ] + + # Try to get program from this cell + cell_key = self._feature_coords_to_key(perturbed_coords) + if cell_key in self.feature_map: + program_id = self.feature_map[cell_key] + if program_id != parent.id and program_id not in [p.id for p in inspirations]: + nearby_programs.append(self.programs[program_id]) + + # If we need more, add random programs + if len(inspirations) + len(nearby_programs) < n: + remaining = n - len(inspirations) - len(nearby_programs) + all_ids = set(self.programs.keys()) + excluded_ids = ( + {parent.id} + .union(p.id for p in inspirations) + .union(p.id for p in nearby_programs) + ) + available_ids = list(all_ids - excluded_ids) + + if available_ids: + random_ids = random.sample(available_ids, min(remaining, len(available_ids))) + random_programs = [self.programs[pid] for pid in random_ids] + nearby_programs.extend(random_programs) + + inspirations.extend(nearby_programs) + + return inspirations[:n] + + def _enforce_population_limit(self) -> None: + """ + Enforce the population size limit by removing worst programs if needed + """ + if len(self.programs) <= self.config.population_size: + return + + # Calculate how many programs to remove + num_to_remove = len(self.programs) - self.config.population_size + + logger.info( + f"Population size ({len(self.programs)}) exceeds limit ({self.config.population_size}), removing {num_to_remove} programs" + ) + + # Get programs sorted by fitness (worst first) + all_programs = list(self.programs.values()) + + # Sort by average metric (worst first) + sorted_programs = sorted( + all_programs, + key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)) if p.metrics else 0.0, + ) + + # Remove worst programs, but never remove the best program + programs_to_remove = [] + for program in sorted_programs: + if len(programs_to_remove) >= num_to_remove: + break + # Don't remove the best program + if program.id != self.best_program_id: + programs_to_remove.append(program) + + # If we still need to remove more and only have the best program protected, + # remove from the remaining programs anyway (but keep the absolute best) + if len(programs_to_remove) < num_to_remove: + remaining_programs = [ + p + for p in sorted_programs + if p not in programs_to_remove and p.id != self.best_program_id + ] + additional_removals = remaining_programs[: num_to_remove - len(programs_to_remove)] + programs_to_remove.extend(additional_removals) + + # Remove the selected programs + for program in programs_to_remove: + program_id = program.id + + # Remove from main programs dict + if program_id in self.programs: + del self.programs[program_id] + + # Remove from feature map + keys_to_remove = [] + for key, pid in self.feature_map.items(): + if pid == program_id: + keys_to_remove.append(key) + for key in keys_to_remove: + del self.feature_map[key] + + # Remove from islands + for island in self.islands: + island.discard(program_id) + + # Remove from archive + self.archive.discard(program_id) + + logger.debug(f"Removed program {program_id} due to population limit") + + logger.info(f"Population size after cleanup: {len(self.programs)}") + + # Island management methods + def set_current_island(self, island_idx: int) -> None: + """Set which island is currently being evolved""" + self.current_island = island_idx % len(self.islands) + logger.debug(f"Switched to evolving island {self.current_island}") + + def next_island(self) -> int: + """Move to the next island in round-robin fashion""" + self.current_island = (self.current_island + 1) % len(self.islands) + logger.debug(f"Advanced to island {self.current_island}") + return self.current_island + + def increment_island_generation(self, island_idx: Optional[int] = None) -> None: + """Increment generation counter for an island""" + idx = island_idx if island_idx is not None else self.current_island + self.island_generations[idx] += 1 + logger.debug(f"Island {idx} generation incremented to {self.island_generations[idx]}") + + def should_migrate(self) -> bool: + """Check if migration should occur based on generation counters""" + max_generation = max(self.island_generations) + return (max_generation - self.last_migration_generation) >= self.migration_interval + + def migrate_programs(self) -> None: + """ + Perform migration between islands + + This should be called periodically to share good solutions between islands + """ + if len(self.islands) < 2: + return + + logger.info("Performing migration between islands") + + for i, island in enumerate(self.islands): + if len(island) == 0: + continue + + # Select top programs from this island for migration + island_programs = [self.programs[pid] for pid in island if pid in self.programs] + if not island_programs: + continue + + # Sort by fitness (using combined_score or average metrics) + island_programs.sort( + key=lambda p: p.metrics.get( + "combined_score", sum(p.metrics.values()) / max(1, len(p.metrics)) + ), + reverse=True, + ) + + # Select top programs for migration + num_to_migrate = max(1, int(len(island_programs) * self.migration_rate)) + migrants = island_programs[:num_to_migrate] + + # Migrate to adjacent islands (ring topology) + target_islands = [(i + 1) % len(self.islands), (i - 1) % len(self.islands)] + + for migrant in migrants: + for target_island in target_islands: + # Create a copy for migration (to avoid removing from source) + migrant_copy = Program( + id=f"{migrant.id}_migrant_{target_island}", + code=migrant.code, + language=migrant.language, + parent_id=migrant.id, + generation=migrant.generation, + metrics=migrant.metrics.copy(), + metadata={**migrant.metadata, "island": target_island, "migrant": True}, + ) + + # Add to target island + self.islands[target_island].add(migrant_copy.id) + self.programs[migrant_copy.id] = migrant_copy + + logger.debug( + f"Migrated program {migrant.id} from island {i} to island {target_island}" + ) + + # Update last migration generation + self.last_migration_generation = max(self.island_generations) + logger.info(f"Migration completed at generation {self.last_migration_generation}") + + def get_island_stats(self) -> List[dict]: + """Get statistics for each island""" + stats = [] + + for i, island in enumerate(self.islands): + island_programs = [self.programs[pid] for pid in island if pid in self.programs] + + if island_programs: + scores = [ + p.metrics.get( + "combined_score", sum(p.metrics.values()) / max(1, len(p.metrics)) + ) + for p in island_programs + ] + + best_score = max(scores) if scores else 0.0 + avg_score = sum(scores) / len(scores) if scores else 0.0 + diversity = self._calculate_island_diversity(island_programs) + else: + best_score = avg_score = diversity = 0.0 + + stats.append( + { + "island": i, + "population_size": len(island_programs), + "best_score": best_score, + "average_score": avg_score, + "diversity": diversity, + "generation": self.island_generations[i], + "is_current": i == self.current_island, + } + ) + + return stats + + def _calculate_island_diversity(self, programs: List[Program]) -> float: + """Calculate diversity within an island""" + if len(programs) < 2: + return 0.0 + + total_distance = 0 + comparisons = 0 + + # Sample up to 10 programs for efficiency + sample_size = min(10, len(programs)) + sample_programs = ( + random.sample(programs, sample_size) if len(programs) > sample_size else programs + ) + + for i, prog1 in enumerate(sample_programs): + for prog2 in sample_programs[i + 1 :]: + total_distance += calculate_edit_distance(prog1.code, prog2.code) + comparisons += 1 + + return total_distance / max(1, comparisons) + + def log_island_status(self) -> None: + """Log current status of all islands""" + stats = self.get_island_stats() + logger.info("Island Status:") + for stat in stats: + current_marker = " *" if stat["is_current"] else " " + logger.info( + f"{current_marker} Island {stat['island']}: {stat['population_size']} programs, " + f"best={stat['best_score']:.4f}, avg={stat['average_score']:.4f}, " + f"diversity={stat['diversity']:.2f}, gen={stat['generation']}" + ) diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py index 4b111f326..d7f4ed654 100644 --- a/openevolve/evaluator.py +++ b/openevolve/evaluator.py @@ -89,46 +89,61 @@ async def evaluate_program( Dictionary of metric name to score """ start_time = time.time() + program_id_str = f" {program_id}" if program_id else "" - # Create a temporary file for the program - with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file: - temp_file.write(program_code.encode("utf-8")) - temp_file_path = temp_file.name + # Retry logic for evaluation + last_exception = None + for attempt in range(self.config.max_retries + 1): + # Create a temporary file for the program + with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file: + temp_file.write(program_code.encode("utf-8")) + temp_file_path = temp_file.name - try: - # Run evaluation - if self.config.cascade_evaluation: - # Run cascade evaluation - metrics = await self._cascade_evaluate(temp_file_path) - else: - # Run direct evaluation - metrics = await self._direct_evaluate(temp_file_path) - - # Add LLM feedback if configured - if self.config.use_llm_feedback and self.llm_ensemble: - feedback_metrics = await self._llm_evaluate(program_code) - - # Combine metrics - for name, value in feedback_metrics.items(): - metrics[f"llm_{name}"] = value * self.config.llm_feedback_weight - - elapsed = time.time() - start_time - program_id_str = f" {program_id}" if program_id else "" - logger.info( - f"Evaluated program{program_id_str} in {elapsed:.2f}s: " - f"{', '.join(f'{name}={value:.4f}' for name, value in metrics.items())}" - ) - - return metrics + try: + # Run evaluation + if self.config.cascade_evaluation: + # Run cascade evaluation + metrics = await self._cascade_evaluate(temp_file_path) + else: + # Run direct evaluation + metrics = await self._direct_evaluate(temp_file_path) - except Exception as e: - logger.error(f"Error evaluating program: {str(e)}") - return {"error": 0.0} + # Add LLM feedback if configured + if self.config.use_llm_feedback and self.llm_ensemble: + feedback_metrics = await self._llm_evaluate(program_code) + + # Combine metrics + for name, value in feedback_metrics.items(): + metrics[f"llm_{name}"] = value * self.config.llm_feedback_weight + + elapsed = time.time() - start_time + logger.info( + f"Evaluated program{program_id_str} in {elapsed:.2f}s: " + f"{', '.join(f'{name}={value:.4f}' for name, value in metrics.items())}" + ) + + return metrics + + except Exception as e: + last_exception = e + logger.warning( + f"Evaluation attempt {attempt + 1}/{self.config.max_retries + 1} failed for program{program_id_str}: {str(e)}" + ) + + # If this is not the last attempt, wait a bit before retrying + if attempt < self.config.max_retries: + await asyncio.sleep(1.0) # Wait 1 second before retry + + finally: + # Clean up temporary file + if os.path.exists(temp_file_path): + os.unlink(temp_file_path) - finally: - # Clean up temporary file - if os.path.exists(temp_file_path): - os.unlink(temp_file_path) + # All retries failed + logger.error( + f"All evaluation attempts failed for program{program_id_str}. Last error: {str(last_exception)}" + ) + return {"error": 0.0} @run_in_executor def _direct_evaluate(self, program_path: str) -> Dict[str, float]: diff --git a/openevolve/llm/openai.py b/openevolve/llm/openai.py index 0cb9cc461..9268b5703 100644 --- a/openevolve/llm/openai.py +++ b/openevolve/llm/openai.py @@ -36,8 +36,10 @@ def __init__( async def generate(self, prompt: str, **kwargs) -> str: """Generate text from a prompt""" + # Use default system message if not provided in kwargs + system_message = kwargs.pop("system_message", "You are a helpful assistant.") return await self.generate_with_context( - system_message=self.config.system_message, + system_message=system_message, messages=[{"role": "user", "content": prompt}], **kwargs, ) diff --git a/openevolve/prompt/sampler.py b/openevolve/prompt/sampler.py index 8d59220c7..ad7a6be38 100644 --- a/openevolve/prompt/sampler.py +++ b/openevolve/prompt/sampler.py @@ -276,10 +276,65 @@ def _format_evolution_history( + "\n\n" ) + # Format diverse programs using num_diverse_programs config + diverse_programs_str = "" + if ( + self.config.num_diverse_programs > 0 + and len(top_programs) > self.config.num_top_programs + ): + # Skip the top programs we already included + remaining_programs = top_programs[self.config.num_top_programs :] + + # Sample diverse programs from the remaining + num_diverse = min(self.config.num_diverse_programs, len(remaining_programs)) + if num_diverse > 0: + # Use random sampling to get diverse programs + diverse_programs = random.sample(remaining_programs, num_diverse) + + diverse_programs_str += "\n\n## Diverse Programs\n\n" + + for i, program in enumerate(diverse_programs): + # Extract a snippet (first 5 lines for diversity) + program_code = program.get("code", "") + program_snippet = "\n".join(program_code.split("\n")[:5]) + if len(program_code.split("\n")) > 5: + program_snippet += "\n# ... (truncated)" + + # Calculate a composite score + score = sum(program.get("metrics", {}).values()) / max( + 1, len(program.get("metrics", {})) + ) + + # Extract key features + key_features = program.get("key_features", []) + if not key_features: + key_features = [ + f"Alternative approach to {name}" + for name in list(program.get("metrics", {}).keys())[ + :2 + ] # Just first 2 metrics + ] + + key_features_str = ", ".join(key_features) + + diverse_programs_str += ( + top_program_template.format( + program_number=f"D{i + 1}", + score=f"{score:.4f}", + language=language, + program_snippet=program_snippet, + key_features=key_features_str, + ) + + "\n\n" + ) + + # Combine top and diverse programs + combined_programs_str = top_programs_str + diverse_programs_str + # Combine into full history return history_template.format( previous_attempts=previous_attempts_str.strip(), - top_programs=top_programs_str.strip(), + top_programs=combined_programs_str.strip(), ) def _apply_template_variations(self, template: str) -> str: