diff --git a/configs/default_config.yaml b/configs/default_config.yaml index d0a69c658..bbcd74ff4 100644 --- a/configs/default_config.yaml +++ b/configs/default_config.yaml @@ -7,7 +7,7 @@ max_iterations: 1000 # Maximum number of evolution iterations checkpoint_interval: 50 # Save checkpoints every N iterations log_level: "INFO" # Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) log_dir: null # Custom directory for logs (default: output_dir/logs) -random_seed: null # Random seed for reproducibility (null = random) +random_seed: 42 # Random seed for reproducibility (null = random, 42 = default) # Evolution settings diff_based_evolution: true # Use diff-based evolution (true) or full rewrites (false) diff --git a/openevolve/config.py b/openevolve/config.py index 1f9d03b14..1b0dc6e80 100644 --- a/openevolve/config.py +++ b/openevolve/config.py @@ -32,6 +32,9 @@ class LLMModelConfig: timeout: int = None retries: int = None retry_delay: int = None + + # Reproducibility + random_seed: Optional[int] = None @dataclass @@ -97,6 +100,7 @@ def __post_init__(self): "timeout": self.timeout, "retries": self.retries, "retry_delay": self.retry_delay, + "random_seed": self.random_seed, } self.update_model_params(shared_config) @@ -165,7 +169,7 @@ class DatabaseConfig: migration_rate: float = 0.1 # Fraction of population to migrate # Random seed for reproducible sampling - random_seed: Optional[int] = None + random_seed: Optional[int] = 42 # Artifact storage artifacts_base_path: Optional[str] = None # Defaults to db_path/artifacts @@ -212,7 +216,7 @@ class Config: checkpoint_interval: int = 100 log_level: str = "INFO" log_dir: Optional[str] = None - random_seed: Optional[int] = None + random_seed: Optional[int] = 42 # Component configurations llm: LLMConfig = field(default_factory=LLMConfig) @@ -256,6 +260,10 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "Config": config.prompt = PromptConfig(**config_dict["prompt"]) if "database" in config_dict: config.database = DatabaseConfig(**config_dict["database"]) + + # Ensure database inherits the random seed if not explicitly set + if config.database.random_seed is None and config.random_seed is not None: + config.database.random_seed = config.random_seed if "evaluator" in config_dict: config.evaluator = EvaluatorConfig(**config_dict["evaluator"]) diff --git a/openevolve/controller.py b/openevolve/controller.py index 2b3b9c8c3..5205561ca 100644 --- a/openevolve/controller.py +++ b/openevolve/controller.py @@ -104,10 +104,27 @@ def __init__( if self.config.random_seed is not None: import random import numpy as np + import hashlib + # Set global random seeds random.seed(self.config.random_seed) np.random.seed(self.config.random_seed) + + # Create hash-based seeds for different components + base_seed = str(self.config.random_seed).encode('utf-8') + llm_seed = int(hashlib.md5(base_seed + b'llm').hexdigest()[:8], 16) % (2**31) + + # Propagate seed to LLM configurations + self.config.llm.random_seed = llm_seed + for model_cfg in self.config.llm.models: + if not hasattr(model_cfg, 'random_seed') or model_cfg.random_seed is None: + model_cfg.random_seed = llm_seed + for model_cfg in self.config.llm.evaluator_models: + if not hasattr(model_cfg, 'random_seed') or model_cfg.random_seed is None: + model_cfg.random_seed = llm_seed + logger.info(f"Set random seed to {self.config.random_seed} for reproducibility") + logger.debug(f"Generated LLM seed: {llm_seed}") # Load initial program self.initial_program_path = initial_program_path diff --git a/openevolve/llm/ensemble.py b/openevolve/llm/ensemble.py index b286ff68e..354f11a1b 100644 --- a/openevolve/llm/ensemble.py +++ b/openevolve/llm/ensemble.py @@ -27,6 +27,13 @@ def __init__(self, models_cfg: List[LLMModelConfig]): self.weights = [model.weight for model in models_cfg] total = sum(self.weights) self.weights = [w / total for w in self.weights] + + # Set up random state for deterministic model selection + self.random_state = random.Random() + # Initialize with seed from first model's config if available + if models_cfg and hasattr(models_cfg[0], 'random_seed') and models_cfg[0].random_seed is not None: + self.random_state.seed(models_cfg[0].random_seed) + logger.debug(f"LLMEnsemble: Set random seed to {models_cfg[0].random_seed} for deterministic model selection") logger.info( f"Initialized LLM ensemble with models: " @@ -50,7 +57,7 @@ async def generate_with_context( def _sample_model(self) -> LLMInterface: """Sample a model from the ensemble based on weights""" - index = random.choices(range(len(self.models)), weights=self.weights, k=1)[0] + index = self.random_state.choices(range(len(self.models)), weights=self.weights, k=1)[0] return self.models[index] async def generate_multiple(self, prompt: str, n: int, **kwargs) -> List[str]: diff --git a/openevolve/llm/openai.py b/openevolve/llm/openai.py index 47a6aba2f..2b8eb9a42 100644 --- a/openevolve/llm/openai.py +++ b/openevolve/llm/openai.py @@ -32,6 +32,7 @@ def __init__( self.retry_delay = model_cfg.retry_delay self.api_base = model_cfg.api_base self.api_key = model_cfg.api_key + self.random_seed = getattr(model_cfg, 'random_seed', None) # Set up API client self.client = openai.OpenAI( @@ -73,6 +74,11 @@ async def generate_with_context( "top_p": kwargs.get("top_p", self.top_p), "max_tokens": kwargs.get("max_tokens", self.max_tokens), } + + # Add seed parameter for reproducibility if configured + seed = kwargs.get("seed", self.random_seed) + if seed is not None: + params["seed"] = seed # Attempt the API call with retries retries = kwargs.get("retries", self.retries) diff --git a/pyproject.toml b/pyproject.toml index 2c98893c8..91c36b672 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "openevolve" -version = "0.0.8" +version = "0.0.9" description = "Open-source implementation of AlphaEvolve" readme = "README.md" requires-python = ">=3.9" diff --git a/setup.py b/setup.py index bfb9e54a8..1632dd3b8 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="openevolve", - version="0.0.8", + version="0.0.9", packages=find_packages(), include_package_data=True, )