Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions examples/rust_adaptive_sort/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,5 @@ evaluator:
timeout: 60 # Rust compilation can take time
parallel_evaluations: 3

# Use cascade evaluation for performance testing
cascade_evaluation: true
cascade_thresholds:
- 0.5 # Compilation success and basic correctness
- 0.7 # Good performance
- 0.85 # Excellent adaptability
# Direct evaluation - evaluator doesn't implement cascade functions
cascade_evaluation: false
326 changes: 279 additions & 47 deletions openevolve/database.py

Large diffs are not rendered by default.

76 changes: 65 additions & 11 deletions openevolve/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,42 @@ def _load_evaluation_function(self) -> None:

self.evaluate_function = module.evaluate
logger.info(f"Successfully loaded evaluation function from {self.evaluation_file}")

# Validate cascade configuration
self._validate_cascade_configuration(module)
except Exception as e:
logger.error(f"Error loading evaluation function: {str(e)}")
raise

def _validate_cascade_configuration(self, module) -> None:
"""
Validate cascade evaluation configuration and warn about potential issues
Args:
module: The loaded evaluation module
"""
if self.config.cascade_evaluation:
# Check if cascade functions exist
has_stage1 = hasattr(module, "evaluate_stage1")
has_stage2 = hasattr(module, "evaluate_stage2")
has_stage3 = hasattr(module, "evaluate_stage3")

if not has_stage1:
logger.warning(
f"Configuration has 'cascade_evaluation: true' but evaluator "
f"'{self.evaluation_file}' does not define 'evaluate_stage1' function. "
f"This will fall back to direct evaluation, making the cascade setting useless. "
f"Consider setting 'cascade_evaluation: false' or implementing cascade functions."
)
elif not (has_stage2 or has_stage3):
logger.warning(
f"Evaluator '{self.evaluation_file}' defines 'evaluate_stage1' but no additional "
f"cascade stages (evaluate_stage2, evaluate_stage3). Consider implementing "
f"multi-stage evaluation for better cascade benefits."
)
else:
logger.debug(f"Cascade evaluation properly configured with available stage functions")

async def evaluate_program(
self,
program_code: str,
Expand Down Expand Up @@ -273,15 +305,15 @@ def get_pending_artifacts(self, program_id: str) -> Optional[Dict[str, Union[str
"""
return self._pending_artifacts.pop(program_id, None)

async def _direct_evaluate(self, program_path: str) -> Dict[str, float]:
async def _direct_evaluate(self, program_path: str) -> Union[Dict[str, float], EvaluationResult]:
"""
Directly evaluate a program using the evaluation function with timeout
Args:
program_path: Path to the program file
Returns:
Dictionary of metric name to score
Dictionary of metrics or EvaluationResult with metrics and artifacts
Raises:
asyncio.TimeoutError: If evaluation exceeds timeout
Expand All @@ -296,11 +328,8 @@ async def run_evaluation():
# Run the evaluation with timeout - let exceptions bubble up for retry handling
result = await asyncio.wait_for(run_evaluation(), timeout=self.config.timeout)

# Validate result
if not isinstance(result, dict):
logger.warning(f"Evaluation returned non-dictionary result: {result}")
return {"error": 0.0}

# Return result as-is to be processed by _process_evaluation_result
# This supports both dict and EvaluationResult returns, just like _cascade_evaluate
return result

async def _cascade_evaluate(
Expand Down Expand Up @@ -354,13 +383,14 @@ async def run_stage1():
)
except Exception as e:
logger.error(f"Error in stage 1 evaluation: {str(e)}")
# Capture stage 1 failure as artifacts
# Capture stage 1 failure with enhanced context
error_context = self._create_cascade_error_context("stage1", e)
return EvaluationResult(
metrics={"stage1_passed": 0.0, "error": 0.0},
artifacts={
"stderr": str(e),
"traceback": traceback.format_exc(),
"failure_stage": "stage1",
**error_context,
},
)

Expand Down Expand Up @@ -481,13 +511,14 @@ async def run_stage3():

except Exception as e:
logger.error(f"Error in cascade evaluation: {str(e)}")
# Return proper cascade failure result instead of re-raising
# Return proper cascade failure result with enhanced context
error_context = self._create_cascade_error_context("cascade_setup", e)
return EvaluationResult(
metrics={"stage1_passed": 0.0, "error": 0.0},
artifacts={
"stderr": str(e),
"traceback": traceback.format_exc(),
"failure_stage": "cascade_setup",
**error_context,
},
)

Expand Down Expand Up @@ -582,6 +613,29 @@ async def _llm_evaluate(self, program_code: str, program_id: str = "") -> Dict[s
traceback.print_exc()
return {}

def _create_cascade_error_context(self, stage: str, error: Exception) -> dict:
"""
Create rich error context for cascade failures
Args:
stage: The stage where the error occurred
error: The exception that was raised
Returns:
Dictionary with enhanced error context
"""
import time
return {
"failure_stage": stage,
"error_type": type(error).__name__,
"error_message": str(error),
"timestamp": time.time(),
"cascade_config": self.config.cascade_evaluation,
"cascade_thresholds": getattr(self.config, 'cascade_thresholds', []),
"timeout_config": self.config.timeout,
"evaluation_file": self.evaluation_file,
}

def _passes_threshold(self, metrics: Dict[str, float], threshold: float) -> bool:
"""
Check if metrics pass a threshold
Expand Down
10 changes: 6 additions & 4 deletions openevolve/iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,18 @@ async def run_iteration_with_shared_db(
# Get artifacts for the parent program if available
parent_artifacts = database.get_artifacts(parent.id)

# Get actual top programs for prompt context (separate from inspirations)
actual_top_programs = database.get_top_programs(5)
# Get island-specific top programs for prompt context (maintain island isolation)
parent_island = parent.metadata.get("island", database.current_island)
island_top_programs = database.get_top_programs(5, island_idx=parent_island)
island_previous_programs = database.get_top_programs(3, island_idx=parent_island)

# Build prompt
prompt = prompt_sampler.build_prompt(
current_program=parent.code,
parent_program=parent.code,
program_metrics=parent.metrics,
previous_programs=[p.to_dict() for p in database.get_top_programs(3)],
top_programs=[p.to_dict() for p in actual_top_programs],
previous_programs=[p.to_dict() for p in island_previous_programs],
top_programs=[p.to_dict() for p in island_top_programs],
inspirations=[p.to_dict() for p in inspirations],
language=config.language,
evolution_round=iteration,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "openevolve"
version = "0.0.14"
version = "0.0.15"
description = "Open-source implementation of AlphaEvolve"
readme = "README.md"
requires-python = ">=3.9"
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="openevolve",
version="0.0.14",
version="0.0.15",
packages=find_packages(),
include_package_data=True,
)
Loading
Loading