Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,40 @@ Sample configuration files are available in the `configs/` directory:
- `default_config.yaml`: Comprehensive configuration with all available options
- `island_config_example.yaml`: Advanced island-based evolution setup

### Prompt Engineering Design

OpenEvolve uses a sophisticated prompt engineering approach that separates different types of program examples to optimize LLM learning:

#### Program Selection Strategy

The system distinguishes between three types of program examples shown to the LLM:

1. **Previous Attempts** (`num_top_programs`): Shows only the best performing programs to demonstrate high-quality approaches
- Used for the "Previous Attempts" section in prompts
- Focused on proven successful patterns
- Helps LLM understand what constitutes good performance

2. **Top Programs** (`num_top_programs + num_diverse_programs`): Broader selection including both top performers and diverse approaches
- Used for the "Top Performing Programs" section
- Includes diverse programs to prevent local optima
- Balances exploitation of known good solutions with exploration of novel approaches

3. **Inspirations** (`num_top_programs`): Cross-island program samples for creative inspiration
- Derived from other evolution islands to maintain diversity
- Count automatically configures based on `num_top_programs` setting
- Prevents convergence by exposing LLM to different evolutionary trajectories

#### Design Rationale

This separation is intentional and serves multiple purposes:

- **Focused Learning**: Previous attempts show only the best patterns, helping LLM understand quality standards
- **Diversity Maintenance**: Top programs include diverse solutions to encourage exploration beyond local optima
- **Cross-Pollination**: Inspirations from other islands introduce novel approaches and prevent stagnation
- **Configurable Balance**: Adjust `num_top_programs` and `num_diverse_programs` to control exploration vs exploitation

The inspiration count automatically scales with `num_top_programs` to maintain consistency across different configuration sizes, eliminating the need for a separate configuration parameter.

### Template Customization

OpenEvolve supports advanced prompt template customization to increase diversity in code evolution:
Expand Down
2 changes: 1 addition & 1 deletion openevolve/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Version information for openevolve package."""

__version__ = "0.1.2"
__version__ = "0.1.3"
19 changes: 12 additions & 7 deletions openevolve/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,18 +304,23 @@ def get(self, program_id: str) -> Optional[Program]:
"""
return self.programs.get(program_id)

def sample(self) -> Tuple[Program, List[Program]]:
def sample(self, num_inspirations: Optional[int] = None) -> Tuple[Program, List[Program]]:
"""
Sample a program and inspirations for the next evolution step

Args:
num_inspirations: Number of inspiration programs to sample (defaults to 5 for backward compatibility)

Returns:
Tuple of (parent_program, inspiration_programs)
"""
# Select parent program
parent = self._sample_parent()

# Select inspirations
inspirations = self._sample_inspirations(parent, n=5)
if num_inspirations is None:
num_inspirations = 5 # Default for backward compatibility
inspirations = self._sample_inspirations(parent, n=num_inspirations)

logger.debug(f"Sampled parent {parent.id} and {len(inspirations)} inspirations")
return parent, inspirations
Expand Down Expand Up @@ -436,10 +441,10 @@ def get_top_programs(
reverse=True,
)
else:
# Sort by average of all numeric metrics
# Sort by combined_score if available, otherwise by average of all numeric metrics
sorted_programs = sorted(
candidates,
key=lambda p: safe_numeric_average(p.metrics),
key=lambda p: p.metrics.get("combined_score", safe_numeric_average(p.metrics)),
reverse=True,
)

Expand Down Expand Up @@ -877,7 +882,7 @@ def _update_archive(self, program: Program) -> None:
# Find worst program among valid programs
if valid_archive_programs:
worst_program = min(
valid_archive_programs, key=lambda p: safe_numeric_average(p.metrics)
valid_archive_programs, key=lambda p: p.metrics.get("combined_score", safe_numeric_average(p.metrics))
)

# Replace if new program is better
Expand Down Expand Up @@ -1279,10 +1284,10 @@ def _enforce_population_limit(self, exclude_program_id: Optional[str] = None) ->
# Get programs sorted by fitness (worst first)
all_programs = list(self.programs.values())

# Sort by average metric (worst first)
# Sort by combined_score if available, otherwise by average metric (worst first)
sorted_programs = sorted(
all_programs,
key=lambda p: safe_numeric_average(p.metrics),
key=lambda p: p.metrics.get("combined_score", safe_numeric_average(p.metrics)),
)

# Remove worst programs, but never remove the best program or excluded program
Expand Down
2 changes: 1 addition & 1 deletion openevolve/iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ async def run_iteration_with_shared_db(

try:
# Sample parent and inspirations from database
parent, inspirations = database.sample()
parent, inspirations = database.sample(num_inspirations=config.prompt.num_top_programs)

# Get artifacts for the parent program if available
parent_artifacts = database.get_artifacts(parent.id)
Expand Down
12 changes: 7 additions & 5 deletions openevolve/process_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,18 +153,20 @@ def _run_iteration_worker(
)

# Use config values for limits instead of hardcoding
island_top_programs = island_programs[
# Programs for LLM display (includes both top and diverse for inspiration)
programs_for_prompt = island_programs[
: _worker_config.prompt.num_top_programs + _worker_config.prompt.num_diverse_programs
]
island_previous_programs = island_programs[: _worker_config.prompt.num_top_programs]
# Best programs only (for previous attempts section, focused on top performers)
best_programs_only = island_programs[: _worker_config.prompt.num_top_programs]

# Build prompt
prompt = _worker_prompt_sampler.build_prompt(
current_program=parent.code,
parent_program=parent.code,
program_metrics=parent.metrics,
previous_programs=[p.to_dict() for p in island_previous_programs],
top_programs=[p.to_dict() for p in island_top_programs],
previous_programs=[p.to_dict() for p in best_programs_only],
top_programs=[p.to_dict() for p in programs_for_prompt],
inspirations=[p.to_dict() for p in inspirations],
language=_worker_config.language,
evolution_round=iteration,
Expand Down Expand Up @@ -589,7 +591,7 @@ def _submit_iteration(self, iteration: int, island_id: Optional[int] = None) ->

try:
# Sample parent and inspirations from the target island
parent, inspirations = self.database.sample()
parent, inspirations = self.database.sample(num_inspirations=self.config.prompt.num_top_programs)
finally:
# Always restore original island state
self.database.current_island = original_island
Expand Down
2 changes: 1 addition & 1 deletion tests/test_island_isolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def test_island_isolation_during_evolution(self):
# Track which islands were sampled
sampled_islands = []

def mock_sample():
def mock_sample(num_inspirations=None):
# Record which island was sampled
sampled_islands.append(self.database.current_island)
# Return mock parent and inspirations
Expand Down