Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions openevolve/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class LLMModelConfig:
timeout: int = None
retries: int = None
retry_delay: int = None

# Reproducibility
random_seed: Optional[int] = None

Expand All @@ -56,10 +56,12 @@ class LLMConfig(LLMModelConfig):
retry_delay: int = 5

# n-model configuration for evolution LLM ensemble
models: List[LLMModelConfig] = field(default_factory=lambda: [
LLMModelConfig(name="gpt-4o-mini", weight=0.8),
LLMModelConfig(name="gpt-4o", weight=0.2)
])
models: List[LLMModelConfig] = field(
default_factory=lambda: [
LLMModelConfig(name="gpt-4o-mini", weight=0.8),
LLMModelConfig(name="gpt-4o", weight=0.2),
]
)

# n-model configuration for evaluator LLM ensemble
evaluator_models: List[LLMModelConfig] = field(default_factory=lambda: [])
Expand Down Expand Up @@ -264,7 +266,7 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "Config":
config.prompt = PromptConfig(**config_dict["prompt"])
if "database" in config_dict:
config.database = DatabaseConfig(**config_dict["database"])

# Ensure database inherits the random seed if not explicitly set
if config.database.random_seed is None and config.random_seed is not None:
config.database.random_seed = config.random_seed
Expand Down Expand Up @@ -365,4 +367,4 @@ def load_config(config_path: Optional[Union[str, Path]] = None) -> Config:
# Make the system message available to the individual models, in case it is not provided from the prompt sampler
config.llm.update_model_params({"system_message": config.prompt.system_message})

return config
return config
50 changes: 24 additions & 26 deletions openevolve/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,20 +104,20 @@ def __init__(
# Set global random seeds
random.seed(self.config.random_seed)
np.random.seed(self.config.random_seed)

# Create hash-based seeds for different components
base_seed = str(self.config.random_seed).encode('utf-8')
llm_seed = int(hashlib.md5(base_seed + b'llm').hexdigest()[:8], 16) % (2**31)
base_seed = str(self.config.random_seed).encode("utf-8")
llm_seed = int(hashlib.md5(base_seed + b"llm").hexdigest()[:8], 16) % (2**31)

# Propagate seed to LLM configurations
self.config.llm.random_seed = llm_seed
for model_cfg in self.config.llm.models:
if not hasattr(model_cfg, 'random_seed') or model_cfg.random_seed is None:
if not hasattr(model_cfg, "random_seed") or model_cfg.random_seed is None:
model_cfg.random_seed = llm_seed
for model_cfg in self.config.llm.evaluator_models:
if not hasattr(model_cfg, 'random_seed') or model_cfg.random_seed is None:
if not hasattr(model_cfg, "random_seed") or model_cfg.random_seed is None:
model_cfg.random_seed = llm_seed

logger.info(f"Set random seed to {self.config.random_seed} for reproducibility")
logger.debug(f"Generated LLM seed: {llm_seed}")

Expand Down Expand Up @@ -161,7 +161,7 @@ def __init__(
self.evaluation_file = evaluation_file

logger.info(f"Initialized OpenEvolve with {initial_program_path}")

# Initialize improved parallel processing components
self.parallel_controller = None

Expand Down Expand Up @@ -212,7 +212,7 @@ async def run(
Best program found
"""
max_iterations = iterations or self.config.max_iterations

# Determine starting iteration
start_iteration = 0
if checkpoint_path and os.path.exists(checkpoint_path):
Expand Down Expand Up @@ -260,30 +260,31 @@ async def run(
self.parallel_controller = ImprovedParallelController(
self.config, self.evaluation_file, self.database
)

# Set up signal handlers for graceful shutdown
def signal_handler(signum, frame):
logger.info(f"Received signal {signum}, initiating graceful shutdown...")
self.parallel_controller.request_shutdown()

# Set up a secondary handler for immediate exit if user presses Ctrl+C again
def force_exit_handler(signum, frame):
logger.info("Force exit requested - terminating immediately")
import sys

sys.exit(0)

signal.signal(signal.SIGINT, force_exit_handler)

signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)

self.parallel_controller.start()

# Run evolution with improved parallel processing and checkpoint callback
await self._run_evolution_with_checkpoints(
start_iteration, max_iterations, target_score
)

finally:
# Clean up parallel processing resources
if self.parallel_controller:
Expand Down Expand Up @@ -420,31 +421,28 @@ def _load_checkpoint(self, checkpoint_path: str) -> None:
"""Load state from a checkpoint directory"""
if not os.path.exists(checkpoint_path):
raise FileNotFoundError(f"Checkpoint directory {checkpoint_path} not found")

logger.info(f"Loading checkpoint from {checkpoint_path}")
self.database.load(checkpoint_path)
logger.info(
f"Checkpoint loaded successfully (iteration {self.database.last_iteration})"
)
logger.info(f"Checkpoint loaded successfully (iteration {self.database.last_iteration})")

async def _run_evolution_with_checkpoints(
self, start_iteration: int, max_iterations: int, target_score: Optional[float]
) -> None:
"""Run evolution with checkpoint saving support"""
logger.info(f"Using island-based evolution with {self.config.database.num_islands} islands")
self.database.log_island_status()

# Run the evolution process with checkpoint callback
await self.parallel_controller.run_evolution(
start_iteration, max_iterations, target_score,
checkpoint_callback=self._save_checkpoint
start_iteration, max_iterations, target_score, checkpoint_callback=self._save_checkpoint
)

# Check if shutdown was requested
if self.parallel_controller.shutdown_flag.is_set():
logger.info("Evolution stopped due to shutdown request")
return

# Save final checkpoint if needed
final_iteration = start_iteration + max_iterations - 1
if final_iteration > 0 and final_iteration % self.config.checkpoint_interval == 0:
Expand Down Expand Up @@ -499,4 +497,4 @@ def _save_best_program(self, program: Optional[Program] = None) -> None:
indent=2,
)

logger.info(f"Saved best program to {code_path} with program info to {info_path}")
logger.info(f"Saved best program to {code_path} with program info to {info_path}")
51 changes: 36 additions & 15 deletions openevolve/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import random
import time
from dataclasses import asdict, dataclass, field, fields

# FileLock removed - no longer needed with threaded parallel processing
from typing import Any, Dict, List, Optional, Set, Tuple, Union

Expand Down Expand Up @@ -198,7 +199,11 @@ def add(
# Update archive
self._update_archive(program)

# Update the absolute best program tracking
# Enforce population size limit BEFORE updating best program tracking
# This ensures newly added programs aren't immediately removed
self._enforce_population_limit(exclude_program_id=program.id)

# Update the absolute best program tracking (after population enforcement)
self._update_best_program(program)

# Save to disk if configured
Expand All @@ -207,9 +212,6 @@ def add(

logger.debug(f"Added program {program.id} to island {island_idx}")

# Enforce population size limit
self._enforce_population_limit()

return program.id

def get(self, program_id: str) -> Optional[Program]:
Expand Down Expand Up @@ -254,9 +256,15 @@ def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]:
return None

# If no specific metric and we have a tracked best program, return it
if metric is None and self.best_program_id and self.best_program_id in self.programs:
logger.debug(f"Using tracked best program: {self.best_program_id}")
return self.programs[self.best_program_id]
if metric is None and self.best_program_id:
if self.best_program_id in self.programs:
logger.debug(f"Using tracked best program: {self.best_program_id}")
return self.programs[self.best_program_id]
else:
logger.warning(
f"Tracked best program {self.best_program_id} no longer exists, will recalculate"
)
self.best_program_id = None

if metric:
# Sort by specific metric
Expand Down Expand Up @@ -713,7 +721,15 @@ def _update_best_program(self, program: Program) -> None:
logger.debug(f"Set initial best program to {program.id}")
return

# Compare with current best program
# Compare with current best program (if it still exists)
if self.best_program_id not in self.programs:
logger.warning(
f"Best program {self.best_program_id} no longer exists, clearing reference"
)
self.best_program_id = program.id
logger.info(f"Set new best program to {program.id}")
return

current_best = self.programs[self.best_program_id]

# Update if the new program is better
Expand Down Expand Up @@ -940,9 +956,12 @@ def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:

return inspirations[:n]

def _enforce_population_limit(self) -> None:
def _enforce_population_limit(self, exclude_program_id: Optional[str] = None) -> None:
"""
Enforce the population size limit by removing worst programs if needed

Args:
exclude_program_id: Program ID to never remove (e.g., newly added program)
"""
if len(self.programs) <= self.config.population_size:
return
Expand All @@ -963,22 +982,24 @@ def _enforce_population_limit(self) -> None:
key=lambda p: safe_numeric_average(p.metrics),
)

# Remove worst programs, but never remove the best program
# Remove worst programs, but never remove the best program or excluded program
programs_to_remove = []
protected_ids = {self.best_program_id, exclude_program_id} - {None}

for program in sorted_programs:
if len(programs_to_remove) >= num_to_remove:
break
# Don't remove the best program
if program.id != self.best_program_id:
# Don't remove the best program or excluded program
if program.id not in protected_ids:
programs_to_remove.append(program)

# If we still need to remove more and only have the best program protected,
# remove from the remaining programs anyway (but keep the absolute best)
# If we still need to remove more and only have protected programs,
# remove from the remaining programs anyway (but keep the protected ones)
if len(programs_to_remove) < num_to_remove:
remaining_programs = [
p
for p in sorted_programs
if p not in programs_to_remove and p.id != self.best_program_id
if p not in programs_to_remove and p.id not in protected_ids
]
additional_removals = remaining_programs[: num_to_remove - len(programs_to_remove)]
programs_to_remove.extend(additional_removals)
Expand Down
21 changes: 9 additions & 12 deletions openevolve/iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,24 +31,21 @@ class Result:
artifacts: dict = None





async def run_iteration_with_shared_db(
iteration: int,
config: Config,
iteration: int,
config: Config,
database: ProgramDatabase,
evaluator: Evaluator,
llm_ensemble: LLMEnsemble,
prompt_sampler: PromptSampler
prompt_sampler: PromptSampler,
):
"""
Run a single iteration using shared memory database

This is optimized for use with persistent worker processes.
"""
logger = logging.getLogger(__name__)

try:
# Sample parent and inspirations from database
parent, inspirations = database.sample()
Expand Down Expand Up @@ -115,10 +112,10 @@ async def run_iteration_with_shared_db(
# Evaluate the child program
child_id = str(uuid.uuid4())
result.child_metrics = await evaluator.evaluate_program(child_code, child_id)

# Handle artifacts if they exist
artifacts = evaluator.get_pending_artifacts(child_id)

# Create a child program
result.child_program = Program(
id=child_id,
Expand All @@ -133,13 +130,13 @@ async def run_iteration_with_shared_db(
"parent_metrics": parent.metrics,
},
)

result.prompt = prompt
result.llm_response = llm_response
result.artifacts = artifacts
result.iteration_time = time.time() - iteration_start
result.iteration = iteration

return result

except Exception as e:
Expand Down
14 changes: 10 additions & 4 deletions openevolve/llm/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,22 @@ def __init__(self, models_cfg: List[LLMModelConfig]):
self.weights = [model.weight for model in models_cfg]
total = sum(self.weights)
self.weights = [w / total for w in self.weights]

# Set up random state for deterministic model selection
self.random_state = random.Random()
# Initialize with seed from first model's config if available
if models_cfg and hasattr(models_cfg[0], 'random_seed') and models_cfg[0].random_seed is not None:
if (
models_cfg
and hasattr(models_cfg[0], "random_seed")
and models_cfg[0].random_seed is not None
):
self.random_state.seed(models_cfg[0].random_seed)
logger.debug(f"LLMEnsemble: Set random seed to {models_cfg[0].random_seed} for deterministic model selection")
logger.debug(
f"LLMEnsemble: Set random seed to {models_cfg[0].random_seed} for deterministic model selection"
)

# Only log if we have multiple models or this is the first ensemble
if len(models_cfg) > 1 or not hasattr(logger, '_ensemble_logged'):
if len(models_cfg) > 1 or not hasattr(logger, "_ensemble_logged"):
logger.info(
f"Initialized LLM ensemble with models: "
+ ", ".join(
Expand Down
8 changes: 4 additions & 4 deletions openevolve/llm/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(
self.retry_delay = model_cfg.retry_delay
self.api_base = model_cfg.api_base
self.api_key = model_cfg.api_key
self.random_seed = getattr(model_cfg, 'random_seed', None)
self.random_seed = getattr(model_cfg, "random_seed", None)

# Set up API client
self.client = openai.OpenAI(
Expand All @@ -41,9 +41,9 @@ def __init__(
)

# Only log unique models to reduce duplication
if not hasattr(logger, '_initialized_models'):
if not hasattr(logger, "_initialized_models"):
logger._initialized_models = set()

if self.model not in logger._initialized_models:
logger.info(f"Initialized OpenAI LLM with model: {self.model}")
logger._initialized_models.add(self.model)
Expand Down Expand Up @@ -80,7 +80,7 @@ async def generate_with_context(
"top_p": kwargs.get("top_p", self.top_p),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
}

# Add seed parameter for reproducibility if configured
# Skip seed parameter for Google AI Studio endpoint as it doesn't support it
seed = kwargs.get("seed", self.random_seed)
Expand Down
Loading