diff --git a/README.md b/README.md index 483178e55..3de993680 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,36 @@ OpenEvolve can also be run from the command line: python openevolve-run.py path/to/initial_program.py path/to/evaluator.py --config path/to/config.yaml --iterations 1000 ``` +### Resuming from Checkpoints + +OpenEvolve automatically saves checkpoints at intervals specified by the `checkpoint_interval` config parameter (default is 10 iterations). You can resume an evolution run from a saved checkpoint: + +```bash +python openevolve-run.py path/to/initial_program.py path/to/evaluator.py \ + --config path/to/config.yaml \ + --checkpoint path/to/checkpoint_directory \ + --iterations 50 +``` + +When resuming from a checkpoint: +- The system loads all previously evolved programs and their metrics +- Checkpoint numbering continues from where it left off (e.g., if loaded from checkpoint_50, the next checkpoint will be checkpoint_60) +- All evolution state is preserved (best programs, feature maps, archives, etc.) + +Example workflow with checkpoints: + +```bash +# Run for 50 iterations (creates checkpoints at iterations 10, 20, 30, 40, 50) +python openevolve-run.py examples/function_minimization/initial_program.py \ + examples/function_minimization/evaluator.py \ + --iterations 50 + +# Resume from checkpoint 50 for another 50 iterations (creates checkpoints at 60, 70, 80, 90, 100) +python openevolve-run.py examples/function_minimization/initial_program.py \ + examples/function_minimization/evaluator.py \ + --checkpoint examples/function_minimization/openevolve_output/checkpoints/checkpoint_50 \ + --iterations 50 +``` ### Docker You can also install and execute via Docker: diff --git a/openevolve/cli.py b/openevolve/cli.py index 4666eb032..ce037e7c4 100644 --- a/openevolve/cli.py +++ b/openevolve/cli.py @@ -45,6 +45,12 @@ def parse_args() -> argparse.Namespace: default="INFO", ) + parser.add_argument( + "--checkpoint", + help="Path to checkpoint directory to resume from (e.g., openevolve_output/checkpoints/checkpoint_50)", + default=None, + ) + parser.add_argument("--api-base", help="Base URL for the LLM API", default=None) parser.add_argument("--primary-model", help="Primary LLM model name", default=None) @@ -101,6 +107,17 @@ async def main_async() -> int: output_dir=args.output, ) + # Load from checkpoint if specified + if args.checkpoint: + if not os.path.exists(args.checkpoint): + print(f"Error: Checkpoint directory '{args.checkpoint}' not found") + return 1 + print(f"Loading checkpoint from {args.checkpoint}") + openevolve.database.load(args.checkpoint) + print( + f"Checkpoint loaded successfully (iteration {openevolve.database.last_iteration})" + ) + # Override log level if specified if args.log_level: logging.getLogger().setLevel(getattr(logging, args.log_level)) @@ -111,11 +128,29 @@ async def main_async() -> int: target_score=args.target_score, ) + # Get the checkpoint path + checkpoint_dir = os.path.join(openevolve.output_dir, "checkpoints") + latest_checkpoint = None + if os.path.exists(checkpoint_dir): + checkpoints = [ + os.path.join(checkpoint_dir, d) + for d in os.listdir(checkpoint_dir) + if os.path.isdir(os.path.join(checkpoint_dir, d)) + ] + if checkpoints: + latest_checkpoint = sorted( + checkpoints, key=lambda x: int(x.split("_")[-1]) if "_" in x else 0 + )[-1] + print(f"\nEvolution complete!") print(f"Best program metrics:") for name, value in best_program.metrics.items(): print(f" {name}: {value:.4f}") + if latest_checkpoint: + print(f"\nLatest checkpoint saved at: {latest_checkpoint}") + print(f"To resume, use: --checkpoint {latest_checkpoint}") + return 0 except Exception as e: diff --git a/openevolve/controller.py b/openevolve/controller.py index 8569bd677..58666eaea 100644 --- a/openevolve/controller.py +++ b/openevolve/controller.py @@ -154,7 +154,14 @@ async def run( self.database.add(initial_program) # Main evolution loop - for i in range(max_iterations): + start_iteration = self.database.last_iteration + total_iterations = start_iteration + max_iterations + + logger.info( + f"Starting evolution from iteration {start_iteration} for {max_iterations} iterations (total: {total_iterations})" + ) + + for i in range(start_iteration, total_iterations): iteration_start = time.time() # Sample parent and inspirations @@ -348,7 +355,7 @@ def _save_checkpoint(self, iteration: int) -> None: # Save the database checkpoint_path = os.path.join(checkpoint_dir, f"checkpoint_{iteration}") - self.database.save(checkpoint_path) + self.database.save(checkpoint_path, iteration) logger.info(f"Saved checkpoint at iteration {iteration} to {checkpoint_path}") diff --git a/openevolve/database.py b/openevolve/database.py index 92aff460a..7226fce9e 100644 --- a/openevolve/database.py +++ b/openevolve/database.py @@ -81,6 +81,9 @@ def __init__(self, config: DatabaseConfig): # Track the absolute best program separately self.best_program_id: Optional[str] = None + # Track the last iteration number (for resuming) + self.last_iteration: int = 0 + # Load database from disk if path is provided if config.db_path and os.path.exists(config.db_path): self.load(config.db_path) @@ -253,12 +256,13 @@ def get_top_programs(self, n: int = 10, metric: Optional[str] = None) -> List[Pr return sorted_programs[:n] - def save(self, path: Optional[str] = None) -> None: + def save(self, path: Optional[str] = None, iteration: int = 0) -> None: """ Save the database to disk Args: path: Path to save to (uses config.db_path if None) + iteration: Current iteration number """ save_path = path or self.config.db_path if not save_path: @@ -278,6 +282,7 @@ def save(self, path: Optional[str] = None) -> None: "islands": [list(island) for island in self.islands], "archive": list(self.archive), "best_program_id": self.best_program_id, + "last_iteration": iteration or self.last_iteration, } with open(os.path.join(save_path, "metadata.json"), "w") as f: @@ -306,6 +311,9 @@ def load(self, path: str) -> None: self.islands = [set(island) for island in metadata.get("islands", [])] self.archive = set(metadata.get("archive", [])) self.best_program_id = metadata.get("best_program_id") + self.last_iteration = metadata.get("last_iteration", 0) + + logger.info(f"Loaded database metadata with last_iteration={self.last_iteration}") # Load programs programs_dir = os.path.join(path, "programs")