From 869031acae4caa021066c8b423af49f74d82094b Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 19 May 2025 11:13:53 +0800 Subject: [PATCH 1/2] allow resuming from checkpoints --- README.md | 31 +++++++++++++++++++++++++++++++ openevolve/cli.py | 28 ++++++++++++++++++++++++++++ openevolve/controller.py | 9 +++++++-- openevolve/database.py | 10 +++++++++- 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7b4a97c18..fcd423931 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,37 @@ OpenEvolve can also be run from the command line: python openevolve-run.py path/to/initial_program.py path/to/evaluator.py --config path/to/config.yaml --iterations 1000 ``` +### Resuming from Checkpoints + +OpenEvolve automatically saves checkpoints at intervals specified by the `checkpoint_interval` config parameter (default is 10 iterations). You can resume an evolution run from a saved checkpoint: + +```bash +python openevolve-run.py path/to/initial_program.py path/to/evaluator.py \ + --config path/to/config.yaml \ + --checkpoint path/to/checkpoint_directory \ + --iterations 50 +``` + +When resuming from a checkpoint: +- The system loads all previously evolved programs and their metrics +- Checkpoint numbering continues from where it left off (e.g., if loaded from checkpoint_50, the next checkpoint will be checkpoint_60) +- All evolution state is preserved (best programs, feature maps, archives, etc.) + +Example workflow with checkpoints: + +```bash +# Run for 50 iterations (creates checkpoints at iterations 10, 20, 30, 40, 50) +python openevolve-run.py examples/function_minimization/initial_program.py \ + examples/function_minimization/evaluator.py \ + --iterations 50 + +# Resume from checkpoint 50 for another 50 iterations (creates checkpoints at 60, 70, 80, 90, 100) +python openevolve-run.py examples/function_minimization/initial_program.py \ + examples/function_minimization/evaluator.py \ + --checkpoint examples/function_minimization/openevolve_output/checkpoints/checkpoint_50 \ + --iterations 50 +``` + ## Configuration OpenEvolve is highly configurable. You can specify configuration options in a YAML file: diff --git a/openevolve/cli.py b/openevolve/cli.py index 6ca7bcfe6..2e5f1b9c6 100644 --- a/openevolve/cli.py +++ b/openevolve/cli.py @@ -66,6 +66,12 @@ def parse_args() -> argparse.Namespace: default="INFO" ) + parser.add_argument( + "--checkpoint", + help="Path to checkpoint directory to resume from (e.g., openevolve_output/checkpoints/checkpoint_50)", + default=None + ) + parser.add_argument( "--api-base", help="Base URL for the LLM API", @@ -134,6 +140,15 @@ async def main_async() -> int: output_dir=args.output, ) + # Load from checkpoint if specified + if args.checkpoint: + if not os.path.exists(args.checkpoint): + print(f"Error: Checkpoint directory '{args.checkpoint}' not found") + return 1 + print(f"Loading checkpoint from {args.checkpoint}") + openevolve.database.load(args.checkpoint) + print(f"Checkpoint loaded successfully (iteration {openevolve.database.last_iteration})") + # Override log level if specified if args.log_level: logging.getLogger().setLevel(getattr(logging, args.log_level)) @@ -144,10 +159,23 @@ async def main_async() -> int: target_score=args.target_score, ) + # Get the checkpoint path + checkpoint_dir = os.path.join(openevolve.output_dir, "checkpoints") + latest_checkpoint = None + if os.path.exists(checkpoint_dir): + checkpoints = [os.path.join(checkpoint_dir, d) for d in os.listdir(checkpoint_dir) + if os.path.isdir(os.path.join(checkpoint_dir, d))] + if checkpoints: + latest_checkpoint = sorted(checkpoints, key=lambda x: int(x.split("_")[-1]) if "_" in x else 0)[-1] + print(f"\nEvolution complete!") print(f"Best program metrics:") for name, value in best_program.metrics.items(): print(f" {name}: {value:.4f}") + + if latest_checkpoint: + print(f"\nLatest checkpoint saved at: {latest_checkpoint}") + print(f"To resume, use: --checkpoint {latest_checkpoint}") return 0 diff --git a/openevolve/controller.py b/openevolve/controller.py index 575d2c4f7..08b22d206 100644 --- a/openevolve/controller.py +++ b/openevolve/controller.py @@ -162,7 +162,12 @@ async def run( self.database.add(initial_program) # Main evolution loop - for i in range(max_iterations): + start_iteration = self.database.last_iteration + total_iterations = start_iteration + max_iterations + + logger.info(f"Starting evolution from iteration {start_iteration} for {max_iterations} iterations (total: {total_iterations})") + + for i in range(start_iteration, total_iterations): iteration_start = time.time() # Sample parent and inspirations @@ -347,7 +352,7 @@ def _save_checkpoint(self, iteration: int) -> None: # Save the database checkpoint_path = os.path.join(checkpoint_dir, f"checkpoint_{iteration}") - self.database.save(checkpoint_path) + self.database.save(checkpoint_path, iteration) logger.info(f"Saved checkpoint at iteration {iteration} to {checkpoint_path}") diff --git a/openevolve/database.py b/openevolve/database.py index 008098179..80ae4c21e 100644 --- a/openevolve/database.py +++ b/openevolve/database.py @@ -80,6 +80,9 @@ def __init__(self, config: DatabaseConfig): # Track the absolute best program separately self.best_program_id: Optional[str] = None + # Track the last iteration number (for resuming) + self.last_iteration: int = 0 + # Load database from disk if path is provided if config.db_path and os.path.exists(config.db_path): self.load(config.db_path) @@ -251,12 +254,13 @@ def get_top_programs( return sorted_programs[:n] - def save(self, path: Optional[str] = None) -> None: + def save(self, path: Optional[str] = None, iteration: int = 0) -> None: """ Save the database to disk Args: path: Path to save to (uses config.db_path if None) + iteration: Current iteration number """ save_path = path or self.config.db_path if not save_path: @@ -276,6 +280,7 @@ def save(self, path: Optional[str] = None) -> None: "islands": [list(island) for island in self.islands], "archive": list(self.archive), "best_program_id": self.best_program_id, + "last_iteration": iteration or self.last_iteration, } with open(os.path.join(save_path, "metadata.json"), "w") as f: @@ -304,6 +309,9 @@ def load(self, path: str) -> None: self.islands = [set(island) for island in metadata.get("islands", [])] self.archive = set(metadata.get("archive", [])) self.best_program_id = metadata.get("best_program_id") + self.last_iteration = metadata.get("last_iteration", 0) + + logger.info(f"Loaded database metadata with last_iteration={self.last_iteration}") # Load programs programs_dir = os.path.join(path, "programs") From 456c07a38550eb5a4b5544e4a5a6d56441d0459d Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Mon, 19 May 2025 11:23:43 +0800 Subject: [PATCH 2/2] linter --- openevolve/cli.py | 57 ++++++++++++++++++---------------------- openevolve/controller.py | 10 ++++--- openevolve/database.py | 10 +++---- 3 files changed, 37 insertions(+), 40 deletions(-) diff --git a/openevolve/cli.py b/openevolve/cli.py index 4a1cf303b..ce037e7c4 100644 --- a/openevolve/cli.py +++ b/openevolve/cli.py @@ -44,31 +44,19 @@ def parse_args() -> argparse.Namespace: choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], default="INFO", ) - + parser.add_argument( "--checkpoint", help="Path to checkpoint directory to resume from (e.g., openevolve_output/checkpoints/checkpoint_50)", - default=None - ) - - parser.add_argument( - "--api-base", - help="Base URL for the LLM API", - default=None - ) - - parser.add_argument( - "--primary-model", - help="Primary LLM model name", - default=None + default=None, ) - - parser.add_argument( - "--secondary-model", - help="Secondary LLM model name", - default=None - ) - + + parser.add_argument("--api-base", help="Base URL for the LLM API", default=None) + + parser.add_argument("--primary-model", help="Primary LLM model name", default=None) + + parser.add_argument("--secondary-model", help="Secondary LLM model name", default=None) + return parser.parse_args() @@ -118,7 +106,7 @@ async def main_async() -> int: config_path=args.config if config is None else None, output_dir=args.output, ) - + # Load from checkpoint if specified if args.checkpoint: if not os.path.exists(args.checkpoint): @@ -126,8 +114,10 @@ async def main_async() -> int: return 1 print(f"Loading checkpoint from {args.checkpoint}") openevolve.database.load(args.checkpoint) - print(f"Checkpoint loaded successfully (iteration {openevolve.database.last_iteration})") - + print( + f"Checkpoint loaded successfully (iteration {openevolve.database.last_iteration})" + ) + # Override log level if specified if args.log_level: logging.getLogger().setLevel(getattr(logging, args.log_level)) @@ -137,25 +127,30 @@ async def main_async() -> int: iterations=args.iterations, target_score=args.target_score, ) - + # Get the checkpoint path checkpoint_dir = os.path.join(openevolve.output_dir, "checkpoints") latest_checkpoint = None if os.path.exists(checkpoint_dir): - checkpoints = [os.path.join(checkpoint_dir, d) for d in os.listdir(checkpoint_dir) - if os.path.isdir(os.path.join(checkpoint_dir, d))] + checkpoints = [ + os.path.join(checkpoint_dir, d) + for d in os.listdir(checkpoint_dir) + if os.path.isdir(os.path.join(checkpoint_dir, d)) + ] if checkpoints: - latest_checkpoint = sorted(checkpoints, key=lambda x: int(x.split("_")[-1]) if "_" in x else 0)[-1] - + latest_checkpoint = sorted( + checkpoints, key=lambda x: int(x.split("_")[-1]) if "_" in x else 0 + )[-1] + print(f"\nEvolution complete!") print(f"Best program metrics:") for name, value in best_program.metrics.items(): print(f" {name}: {value:.4f}") - + if latest_checkpoint: print(f"\nLatest checkpoint saved at: {latest_checkpoint}") print(f"To resume, use: --checkpoint {latest_checkpoint}") - + return 0 except Exception as e: diff --git a/openevolve/controller.py b/openevolve/controller.py index af6cc6acc..58666eaea 100644 --- a/openevolve/controller.py +++ b/openevolve/controller.py @@ -156,9 +156,11 @@ async def run( # Main evolution loop start_iteration = self.database.last_iteration total_iterations = start_iteration + max_iterations - - logger.info(f"Starting evolution from iteration {start_iteration} for {max_iterations} iterations (total: {total_iterations})") - + + logger.info( + f"Starting evolution from iteration {start_iteration} for {max_iterations} iterations (total: {total_iterations})" + ) + for i in range(start_iteration, total_iterations): iteration_start = time.time() @@ -354,7 +356,7 @@ def _save_checkpoint(self, iteration: int) -> None: # Save the database checkpoint_path = os.path.join(checkpoint_dir, f"checkpoint_{iteration}") self.database.save(checkpoint_path, iteration) - + logger.info(f"Saved checkpoint at iteration {iteration} to {checkpoint_path}") def _save_best_program(self, program: Optional[Program] = None) -> None: diff --git a/openevolve/database.py b/openevolve/database.py index cc75803a7..7226fce9e 100644 --- a/openevolve/database.py +++ b/openevolve/database.py @@ -80,10 +80,10 @@ def __init__(self, config: DatabaseConfig): # Track the absolute best program separately self.best_program_id: Optional[str] = None - + # Track the last iteration number (for resuming) self.last_iteration: int = 0 - + # Load database from disk if path is provided if config.db_path and os.path.exists(config.db_path): self.load(config.db_path) @@ -255,7 +255,7 @@ def get_top_programs(self, n: int = 10, metric: Optional[str] = None) -> List[Pr ) return sorted_programs[:n] - + def save(self, path: Optional[str] = None, iteration: int = 0) -> None: """ Save the database to disk @@ -312,9 +312,9 @@ def load(self, path: str) -> None: self.archive = set(metadata.get("archive", [])) self.best_program_id = metadata.get("best_program_id") self.last_iteration = metadata.get("last_iteration", 0) - + logger.info(f"Loaded database metadata with last_iteration={self.last_iteration}") - + # Load programs programs_dir = os.path.join(path, "programs") if os.path.exists(programs_dir):