diff --git a/openevolve/__init__.py b/openevolve/__init__.py index 25849c002..39e739ca5 100644 --- a/openevolve/__init__.py +++ b/openevolve/__init__.py @@ -9,15 +9,15 @@ evolve_function, evolve_algorithm, evolve_code, - EvolutionResult + EvolutionResult, ) __all__ = [ - "OpenEvolve", + "OpenEvolve", "__version__", "run_evolution", - "evolve_function", + "evolve_function", "evolve_algorithm", "evolve_code", - "EvolutionResult" + "EvolutionResult", ] diff --git a/openevolve/api.py b/openevolve/api.py index bb2550cb3..b921c2918 100644 --- a/openevolve/api.py +++ b/openevolve/api.py @@ -1,6 +1,7 @@ """ High-level API for using OpenEvolve as a library """ + import asyncio import tempfile import os @@ -18,35 +19,35 @@ @dataclass class EvolutionResult: """Result of an evolution run""" + best_program: Optional[Program] best_score: float best_code: str metrics: Dict[str, Any] output_dir: Optional[str] - + def __repr__(self): return f"EvolutionResult(best_score={self.best_score:.4f})" - def run_evolution( initial_program: Union[str, Path, List[str]], evaluator: Union[str, Path, Callable], config: Union[str, Path, Config, None] = None, iterations: Optional[int] = None, output_dir: Optional[str] = None, - cleanup: bool = True + cleanup: bool = True, ) -> EvolutionResult: """ Run evolution with flexible inputs - the main library API - + Args: initial_program: Can be: - Path to a program file (str or Path) - Program code as a string - List of code lines evaluator: Can be: - - Path to an evaluator file (str or Path) + - Path to an evaluator file (str or Path) - Callable function that takes (program_path) and returns metrics dict config: Can be: - Path to config YAML file (str or Path) @@ -55,17 +56,17 @@ def run_evolution( iterations: Number of iterations (overrides config) output_dir: Output directory (None for temp directory) cleanup: If True, clean up temp files after evolution - + Returns: EvolutionResult with best program and metrics - + Examples: # Using file paths (original way) result = run_evolution( 'program.py', 'evaluator.py' ) - + # Using code strings result = run_evolution( initial_program=''' @@ -77,20 +78,20 @@ def solve(x): evaluator=lambda path: {"score": evaluate_program(path)}, iterations=100 ) - + # Using a custom evaluator function def my_evaluator(program_path): # Run tests, benchmarks, etc. return {"score": 0.95, "runtime": 1.2} - + result = run_evolution( initial_program=generate_initial_code(), evaluator=my_evaluator ) """ - return asyncio.run(_run_evolution_async( - initial_program, evaluator, config, iterations, output_dir, cleanup - )) + return asyncio.run( + _run_evolution_async(initial_program, evaluator, config, iterations, output_dir, cleanup) + ) async def _run_evolution_async( @@ -99,13 +100,13 @@ async def _run_evolution_async( config: Union[str, Path, Config, None], iterations: Optional[int], output_dir: Optional[str], - cleanup: bool + cleanup: bool, ) -> EvolutionResult: """Async implementation of run_evolution""" - + temp_dir = None temp_files = [] - + try: # Handle configuration if config is None: @@ -114,7 +115,7 @@ async def _run_evolution_async( config_obj = config else: config_obj = load_config(str(config)) - + # Validate that LLM models are configured if not config_obj.llm.models: raise ValueError( @@ -125,7 +126,7 @@ async def _run_evolution_async( "config.llm.models = [LLMModelConfig(name='gpt-4', api_key='your-key')]\n" "result = run_evolution(program, evaluator, config=config)" ) - + # Set up output directory if output_dir is None and cleanup: temp_dir = tempfile.mkdtemp(prefix="openevolve_") @@ -133,50 +134,47 @@ async def _run_evolution_async( else: actual_output_dir = output_dir or "openevolve_output" os.makedirs(actual_output_dir, exist_ok=True) - + # Process initial program program_path = _prepare_program(initial_program, temp_dir, temp_files) - + # Process evaluator evaluator_path = _prepare_evaluator(evaluator, temp_dir, temp_files) - + # Create and run controller controller = OpenEvolve( initial_program_path=program_path, evaluation_file=evaluator_path, config=config_obj, - output_dir=actual_output_dir + output_dir=actual_output_dir, ) - + best_program = await controller.run(iterations=iterations) - + # Prepare result best_score = 0.0 metrics = {} best_code = "" - + if best_program: best_code = best_program.code metrics = best_program.metrics or {} - + if "combined_score" in metrics: best_score = metrics["combined_score"] elif metrics: - numeric_metrics = [ - v for v in metrics.values() - if isinstance(v, (int, float)) - ] + numeric_metrics = [v for v in metrics.values() if isinstance(v, (int, float))] if numeric_metrics: best_score = sum(numeric_metrics) / len(numeric_metrics) - + return EvolutionResult( best_program=best_program, best_score=best_score, best_code=best_code, metrics=metrics, - output_dir=actual_output_dir if not cleanup else None + output_dir=actual_output_dir if not cleanup else None, ) - + finally: # Cleanup temporary files if requested if cleanup: @@ -187,6 +185,7 @@ async def _run_evolution_async( pass if temp_dir and os.path.exists(temp_dir): import shutil + try: shutil.rmtree(temp_dir) except: @@ -194,62 +193,58 @@ async def _run_evolution_async( def _prepare_program( - initial_program: Union[str, Path, List[str]], - temp_dir: Optional[str], - temp_files: List[str] + initial_program: Union[str, Path, List[str]], temp_dir: Optional[str], temp_files: List[str] ) -> str: """Convert program input to a file path""" - + # If already a file path, use it directly if isinstance(initial_program, (str, Path)): if os.path.exists(str(initial_program)): return str(initial_program) - + # Otherwise, treat as code and write to temp file if isinstance(initial_program, list): - code = '\n'.join(initial_program) + code = "\n".join(initial_program) else: code = str(initial_program) - + # Ensure code has evolution markers if it doesn't already if "EVOLVE-BLOCK-START" not in code: # Wrap entire code in evolution block code = f"""# EVOLVE-BLOCK-START {code} # EVOLVE-BLOCK-END""" - + # Write to temp file if temp_dir is None: temp_dir = tempfile.gettempdir() - + program_file = os.path.join(temp_dir, f"program_{uuid.uuid4().hex[:8]}.py") - with open(program_file, 'w') as f: + with open(program_file, "w") as f: f.write(code) temp_files.append(program_file) - + return program_file def _prepare_evaluator( - evaluator: Union[str, Path, Callable], - temp_dir: Optional[str], - temp_files: List[str] + evaluator: Union[str, Path, Callable], temp_dir: Optional[str], temp_files: List[str] ) -> str: """Convert evaluator input to a file path""" - + # If already a file path, use it directly if isinstance(evaluator, (str, Path)): if os.path.exists(str(evaluator)): return str(evaluator) - + # If it's a callable, create a wrapper module if callable(evaluator): # Create a unique global name for this evaluator evaluator_id = f"_openevolve_evaluator_{uuid.uuid4().hex[:8]}" - + # Store in globals so the wrapper can find it globals()[evaluator_id] = evaluator - + evaluator_code = f""" # Wrapper for user-provided evaluator function import {__name__} as api_module @@ -262,45 +257,41 @@ def evaluate(program_path): else: # Treat as code string evaluator_code = str(evaluator) - + # Ensure it has an evaluate function if "def evaluate" not in evaluator_code: - raise ValueError( - "Evaluator code must contain an 'evaluate(program_path)' function" - ) - + raise ValueError("Evaluator code must contain an 'evaluate(program_path)' function") + # Write to temp file if temp_dir is None: temp_dir = tempfile.gettempdir() - + eval_file = os.path.join(temp_dir, f"evaluator_{uuid.uuid4().hex[:8]}.py") - with open(eval_file, 'w') as f: + with open(eval_file, "w") as f: f.write(evaluator_code) temp_files.append(eval_file) - + return eval_file # Additional helper functions for common use cases + def evolve_function( - func: Callable, - test_cases: List[Tuple[Any, Any]], - iterations: int = 100, - **kwargs + func: Callable, test_cases: List[Tuple[Any, Any]], iterations: int = 100, **kwargs ) -> EvolutionResult: """ Evolve a Python function based on test cases - + Args: func: Initial function to evolve test_cases: List of (input, expected_output) tuples iterations: Number of evolution iterations **kwargs: Additional arguments for run_evolution - + Returns: EvolutionResult with optimized function - + Example: def initial_sort(arr): # Slow bubble sort @@ -309,7 +300,7 @@ def initial_sort(arr): if arr[j] > arr[j+1]: arr[j], arr[j+1] = arr[j+1], arr[j] return arr - + result = evolve_function( initial_sort, test_cases=[ @@ -320,17 +311,17 @@ def initial_sort(arr): ) print(f"Optimized function score: {result.best_score}") """ - + # Get function source code func_source = inspect.getsource(func) func_name = func.__name__ - + # Ensure the function source has evolution markers if "EVOLVE-BLOCK-START" not in func_source: # Try to add markers around the function body - lines = func_source.split('\n') - func_def_line = next(i for i, line in enumerate(lines) if line.strip().startswith('def ')) - + lines = func_source.split("\n") + func_def_line = next(i for i, line in enumerate(lines) if line.strip().startswith("def ")) + # Find the end of the function (simplified approach) indent = len(lines[func_def_line]) - len(lines[func_def_line].lstrip()) func_end = len(lines) @@ -338,37 +329,37 @@ def initial_sort(arr): if lines[i].strip() and (len(lines[i]) - len(lines[i].lstrip())) <= indent: func_end = i break - + # Insert evolution markers lines.insert(func_def_line + 1, " " * (indent + 4) + "# EVOLVE-BLOCK-START") lines.insert(func_end + 1, " " * (indent + 4) + "# EVOLVE-BLOCK-END") - func_source = '\n'.join(lines) - + func_source = "\n".join(lines) + # Create evaluator that tests the function def evaluator(program_path): import importlib.util import sys - + # Load the evolved program spec = importlib.util.spec_from_file_location("evolved", program_path) if spec is None or spec.loader is None: return {"score": 0.0, "error": "Failed to load program"} - + module = importlib.util.module_from_spec(spec) - + try: spec.loader.exec_module(module) except Exception as e: return {"score": 0.0, "error": f"Failed to execute program: {str(e)}"} - + if not hasattr(module, func_name): return {"score": 0.0, "error": f"Function '{func_name}' not found"} - + evolved_func = getattr(module, func_name) correct = 0 total = len(test_cases) errors = [] - + for input_val, expected in test_cases: try: # Handle case where input is a list/mutable - make a copy @@ -376,7 +367,7 @@ def evaluator(program_path): test_input = input_val.copy() else: test_input = input_val - + result = evolved_func(test_input) if result == expected: correct += 1 @@ -384,136 +375,126 @@ def evaluator(program_path): errors.append(f"Input {input_val}: expected {expected}, got {result}") except Exception as e: errors.append(f"Input {input_val}: {str(e)}") - + return { "score": correct / total, "test_pass_rate": correct / total, "tests_passed": correct, "total_tests": total, - "errors": errors[:3] # Limit error details + "errors": errors[:3], # Limit error details } - + return run_evolution( - initial_program=func_source, - evaluator=evaluator, - iterations=iterations, - **kwargs + initial_program=func_source, evaluator=evaluator, iterations=iterations, **kwargs ) def evolve_algorithm( - algorithm_class: type, - benchmark: Callable, - iterations: int = 100, - **kwargs + algorithm_class: type, benchmark: Callable, iterations: int = 100, **kwargs ) -> EvolutionResult: """ Evolve an algorithm class based on a benchmark - + Args: algorithm_class: Initial algorithm class to evolve benchmark: Function that takes an instance and returns metrics iterations: Number of evolution iterations **kwargs: Additional arguments for run_evolution - + Returns: EvolutionResult with optimized algorithm - + Example: class SortAlgorithm: def sort(self, arr): # Simple bubble sort return sorted(arr) # placeholder - + def benchmark_sort(instance): import time test_data = [list(range(100, 0, -1))] # Reverse sorted - + start = time.time() for data in test_data: result = instance.sort(data.copy()) if result != sorted(data): return {"score": 0.0} - + duration = time.time() - start return { "score": 1.0, "runtime": duration, "performance": 1.0 / (duration + 0.001) } - + result = evolve_algorithm(SortAlgorithm, benchmark_sort, iterations=50) """ - + # Get class source code class_source = inspect.getsource(algorithm_class) - + # Ensure the class has evolution markers if "EVOLVE-BLOCK-START" not in class_source: - lines = class_source.split('\n') + lines = class_source.split("\n") # Find class definition - class_def_line = next(i for i, line in enumerate(lines) if line.strip().startswith('class ')) - + class_def_line = next( + i for i, line in enumerate(lines) if line.strip().startswith("class ") + ) + # Add evolution markers around the class body indent = len(lines[class_def_line]) - len(lines[class_def_line].lstrip()) lines.insert(class_def_line + 1, " " * (indent + 4) + "# EVOLVE-BLOCK-START") lines.append(" " * (indent + 4) + "# EVOLVE-BLOCK-END") - class_source = '\n'.join(lines) - + class_source = "\n".join(lines) + # Create evaluator def evaluator(program_path): import importlib.util - + # Load the evolved program spec = importlib.util.spec_from_file_location("evolved", program_path) if spec is None or spec.loader is None: return {"score": 0.0, "error": "Failed to load program"} - + module = importlib.util.module_from_spec(spec) - + try: spec.loader.exec_module(module) except Exception as e: return {"score": 0.0, "error": f"Failed to execute program: {str(e)}"} - + if not hasattr(module, algorithm_class.__name__): return {"score": 0.0, "error": f"Class '{algorithm_class.__name__}' not found"} - + AlgorithmClass = getattr(module, algorithm_class.__name__) - + try: instance = AlgorithmClass() metrics = benchmark(instance) return metrics if isinstance(metrics, dict) else {"score": metrics} except Exception as e: return {"score": 0.0, "error": str(e)} - + return run_evolution( - initial_program=class_source, - evaluator=evaluator, - iterations=iterations, - **kwargs + initial_program=class_source, evaluator=evaluator, iterations=iterations, **kwargs ) def evolve_code( - initial_code: str, - evaluator: Callable[[str], Dict[str, Any]], - iterations: int = 100, - **kwargs + initial_code: str, evaluator: Callable[[str], Dict[str, Any]], iterations: int = 100, **kwargs ) -> EvolutionResult: """ Evolve arbitrary code with a custom evaluator - + Args: initial_code: Initial code to evolve evaluator: Function that takes a program path and returns metrics iterations: Number of evolution iterations **kwargs: Additional arguments for run_evolution - + Returns: EvolutionResult with optimized code - + Example: initial_code = ''' def fibonacci(n): @@ -521,21 +502,21 @@ def fibonacci(n): return n return fibonacci(n-1) + fibonacci(n-2) ''' - + def eval_fib(program_path): # Evaluate fibonacci implementation import importlib.util import time - + spec = importlib.util.spec_from_file_location("fib", program_path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) - + try: start = time.time() result = module.fibonacci(20) duration = time.time() - start - + correct = result == 6765 return { "score": 1.0 if correct else 0.0, @@ -544,12 +525,9 @@ def eval_fib(program_path): } except: return {"score": 0.0} - + result = evolve_code(initial_code, eval_fib, iterations=50) """ return run_evolution( - initial_program=initial_code, - evaluator=evaluator, - iterations=iterations, - **kwargs - ) \ No newline at end of file + initial_program=initial_code, evaluator=evaluator, iterations=iterations, **kwargs + ) diff --git a/openevolve/config.py b/openevolve/config.py index 543874496..199961325 100644 --- a/openevolve/config.py +++ b/openevolve/config.py @@ -400,6 +400,7 @@ class Config: # Evolution settings diff_based_evolution: bool = True max_code_length: int = 10000 + diff_pattern: str = r"<<<<<<< SEARCH\n(.*?)=======\n(.*?)>>>>>>> REPLACE" # Early stopping settings early_stopping_patience: Optional[int] = None @@ -451,6 +452,13 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "Config": config.evaluator = EvaluatorConfig(**config_dict["evaluator"]) if "evolution_trace" in config_dict: config.evolution_trace = EvolutionTraceConfig(**config_dict["evolution_trace"]) + if "diff_pattern" in config_dict: + # Validate it's a valid regex + try: + re.compile(config_dict["diff_pattern"]) + except re.error as e: + raise ValueError(f"Invalid regex pattern in diff_pattern: {e}") + config.diff_pattern = config_dict["diff_pattern"] return config diff --git a/openevolve/database.py b/openevolve/database.py index f7b10bdb2..d39792c0c 100644 --- a/openevolve/database.py +++ b/openevolve/database.py @@ -112,9 +112,7 @@ def __init__(self, config: DatabaseConfig): self.programs: Dict[str, Program] = {} # Per-island feature grids for MAP-Elites - self.island_feature_maps: List[Dict[str, str]] = [ - {} for _ in range(config.num_islands) - ] + self.island_feature_maps: List[Dict[str, str]] = [{} for _ in range(config.num_islands)] # Handle both int and dict types for feature_bins if isinstance(config.feature_bins, int): @@ -186,13 +184,15 @@ def __init__(self, config: DatabaseConfig): } logger.info(f"Initialized program database with {len(self.programs)} programs") - + # Novelty judge setup from openevolve.embedding import EmbeddingClient + self.novelty_llm = config.novelty_llm - self.embedding_client = EmbeddingClient(config.embedding_model) if config.embedding_model else None + self.embedding_client = ( + EmbeddingClient(config.embedding_model) if config.embedding_model else None + ) self.similarity_threshold = config.similarity_threshold - def add( self, program: Program, iteration: int = None, target_island: Optional[int] = None @@ -252,7 +252,9 @@ def add( # Novelty check before adding if not self._is_novel(program.id, island_idx): - logger.debug(f"Program {program.id} failed in novelty check and won't be added in the island {island_idx}") + logger.debug( + f"Program {program.id} failed in novelty check and won't be added in the island {island_idx}" + ) return program.id # Do not add non-novel program # Add to island-specific feature map (replacing existing if better) @@ -282,7 +284,9 @@ def add( if feature_key not in island_feature_map: # New cell occupation in this island - logger.info("New MAP-Elites cell occupied in island %d: %s", island_idx, coords_dict) + logger.info( + "New MAP-Elites cell occupied in island %d: %s", island_idx, coords_dict + ) # Check coverage milestone for this island total_possible_cells = self.feature_bins ** len(self.config.feature_dimensions) island_coverage = (len(island_feature_map) + 1) / total_possible_cells @@ -443,10 +447,7 @@ def sample_from_island( # Sample inspirations inspiration_ids = random.sample(other_programs, num_inspirations) - inspirations = [ - self.programs[pid] for pid in inspiration_ids - if pid in self.programs - ] + inspirations = [self.programs[pid] for pid in inspiration_ids if pid in self.programs] logger.debug( f"Sampled parent {parent.id} and {len(inspirations)} inspirations from island {island_id} " @@ -639,7 +640,9 @@ def load(self, path: str) -> None: with open(metadata_path, "r") as f: metadata = json.load(f) - self.island_feature_maps = metadata.get("island_feature_maps", [{} for _ in range(self.config.num_islands)]) + self.island_feature_maps = metadata.get( + "island_feature_maps", [{} for _ in range(self.config.num_islands)] + ) saved_islands = metadata.get("islands", []) self.archive = set(metadata.get("archive", [])) self.best_program_id = metadata.get("best_program_id") @@ -758,7 +761,9 @@ def _reconstruct_islands(self, saved_islands: List[List[str]]) -> None: ) if feature_keys_to_remove: - logger.info(f"Removed {len(feature_keys_to_remove)} missing programs from island feature maps") + logger.info( + f"Removed {len(feature_keys_to_remove)} missing programs from island feature maps" + ) logger.info(f"Reconstructed islands: restored {restored_programs} programs to islands") @@ -950,7 +955,7 @@ def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float: """ Adapted from SakanaAI/ShinkaEvolve (Apache-2.0 License) Original source: https://github.com/SakanaAI/ShinkaEvolve/blob/main/shinka/database/dbase.py#L1452 - + Compute cosine similarity between two vectors. """ if not vec1 or not vec2 or len(vec1) != len(vec2): @@ -966,9 +971,9 @@ def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float: return 0.0 similarity = np.dot(arr1, arr2) / (norm_a * norm_b) - + return float(similarity) - + def _llm_judge_novelty(self, program: Program, similar_program: Program) -> bool: """ Use LLM to judge if a program is novel compared to a similar existing program @@ -988,13 +993,14 @@ def _llm_judge_novelty(self, program: Program, similar_program: Program) -> bool loop = asyncio.get_running_loop() # We're in an async context, need to run in a new thread import concurrent.futures + with concurrent.futures.ThreadPoolExecutor() as executor: future = executor.submit( asyncio.run, self.novelty_llm.generate_with_context( system_message=NOVELTY_SYSTEM_MSG, messages=[{"role": "user", "content": user_msg}], - ) + ), ) content: str = future.result() except RuntimeError: @@ -1034,7 +1040,7 @@ def _llm_judge_novelty(self, program: Program, similar_program: Program) -> bool logger.error(f"Error in novelty LLM check: {e}") return True - + def _is_novel(self, program_id: int, island_idx: int) -> bool: """ Determine if a program is novel based on diversity to existing programs @@ -1042,7 +1048,7 @@ def _is_novel(self, program_id: int, island_idx: int) -> bool: Args: program: Program to check island_idx: Island index - + Returns: True if novel, False otherwise """ @@ -1053,27 +1059,29 @@ def _is_novel(self, program_id: int, island_idx: int) -> bool: program = self.programs[program_id] embd = self.embedding_client.get_embedding(program.code) self.programs[program_id].embedding = embd - - max_smlty = float('-inf') + + max_smlty = float("-inf") max_smlty_pid = None - + for pid in self.islands[island_idx]: other = self.programs[pid] - + if other.embedding is None: - logger.log("Warning: Program %s has no embedding, skipping similarity check", other.id) + logger.log( + "Warning: Program %s has no embedding, skipping similarity check", other.id + ) continue - + similarity = self._cosine_similarity(embd, other.embedding) - + if similarity >= max(max_smlty, self.similarity_threshold): max_smlty = similarity max_smlty_pid = pid - + if max_smlty_pid is None: # No similar programs found, consider it novel return True - + return self._llm_judge_novelty(program, self.programs[max_smlty_pid]) def _is_better(self, program1: Program, program2: Program) -> bool: @@ -1424,8 +1432,7 @@ def _sample_from_island_weighted(self, island_id: int) -> Program: else: # Use weighted sampling based on program scores island_program_objects = [ - self.programs[pid] for pid in island_programs - if pid in self.programs + self.programs[pid] for pid in island_programs if pid in self.programs ] if not island_program_objects: @@ -1480,7 +1487,9 @@ def _sample_from_island_random(self, island_id: int) -> Program: valid_programs = [pid for pid in island_programs if pid in self.programs] if not valid_programs: - logger.warning(f"Island {island_id} has no valid programs, falling back to random sampling") + logger.warning( + f"Island {island_id} has no valid programs, falling back to random sampling" + ) return self._sample_random_parent() # Uniform random selection @@ -1506,16 +1515,16 @@ def _sample_from_archive_for_island(self, island_id: int) -> Program: valid_archive = [pid for pid in self.archive if pid in self.programs] if not valid_archive: - logger.warning("Archive has no valid programs, falling back to weighted island sampling") + logger.warning( + "Archive has no valid programs, falling back to weighted island sampling" + ) return self._sample_from_island_weighted(island_id) island_id = island_id % len(self.islands) # Prefer programs from the specified island in archive archive_programs_in_island = [ - pid - for pid in valid_archive - if self.programs[pid].metadata.get("island") == island_id + pid for pid in valid_archive if self.programs[pid].metadata.get("island") == island_id ] if archive_programs_in_island: @@ -1810,7 +1819,8 @@ def migrate_programs(self) -> None: # Skip migration if target island already has a program with identical code # Identical code produces identical metrics, so migration would be wasteful target_island_programs = [ - self.programs[pid] for pid in self.islands[target_island] + self.programs[pid] + for pid in self.islands[target_island] if pid in self.programs ] has_duplicate_code = any(p.code == migrant.code for p in target_island_programs) @@ -1823,6 +1833,7 @@ def migrate_programs(self) -> None: continue # Create a copy for migration with simple new UUID import uuid + migrant_copy = Program( id=str(uuid.uuid4()), code=migrant.code, diff --git a/openevolve/embedding.py b/openevolve/embedding.py index 4016bd4b3..302d4513f 100644 --- a/openevolve/embedding.py +++ b/openevolve/embedding.py @@ -27,9 +27,9 @@ "text-embedding-3-large": 0.13 / M, } + class EmbeddingClient: - def __init__( - self, model_name: str = "text-embedding-3-small"): + def __init__(self, model_name: str = "text-embedding-3-small"): """ Initialize the EmbeddingClient. @@ -37,7 +37,7 @@ def __init__( model (str): The OpenAI embedding model name to use. """ self.client, self.model = self._get_client_model(model_name) - + def _get_client_model(self, model_name: str) -> tuple[openai.OpenAI, str]: if model_name in OPENAI_EMBEDDING_MODELS: # Use OPENAI_EMBEDDING_API_KEY if set, otherwise fall back to OPENAI_API_KEY @@ -58,9 +58,7 @@ def _get_client_model(self, model_name: str) -> tuple[openai.OpenAI, str]: return client, model_to_use - def get_embedding( - self, code: Union[str, List[str]] - ) -> Union[List[float], List[List[float]]]: + def get_embedding(self, code: Union[str, List[str]]) -> Union[List[float], List[List[float]]]: """ Computes the text embedding for a code string. diff --git a/openevolve/iteration.py b/openevolve/iteration.py index ce854c932..b2347e006 100644 --- a/openevolve/iteration.py +++ b/openevolve/iteration.py @@ -84,14 +84,14 @@ async def run_iteration_with_shared_db( # Parse the response if config.diff_based_evolution: - diff_blocks = extract_diffs(llm_response) + diff_blocks = extract_diffs(llm_response, config.diff_pattern) if not diff_blocks: logger.warning(f"Iteration {iteration+1}: No valid diffs found in response") return None # Apply the diffs - child_code = apply_diff(parent.code, llm_response) + child_code = apply_diff(parent.code, llm_response, config.diff_pattern) changes_summary = format_diff_summary(diff_blocks) else: # Parse full rewrite @@ -120,9 +120,7 @@ async def run_iteration_with_shared_db( artifacts = evaluator.get_pending_artifacts(child_id) # Set template_key of Prompts - template_key = ( - "full_rewrite_user" if not config.diff_based_evolution else "diff_user" - ) + template_key = "full_rewrite_user" if not config.diff_based_evolution else "diff_user" # Create a child program result.child_program = Program( @@ -137,13 +135,17 @@ async def run_iteration_with_shared_db( "changes": changes_summary, "parent_metrics": parent.metrics, }, - prompts={ - template_key: { - "system": prompt["system"], - "user": prompt["user"], - "responses": [llm_response] if llm_response is not None else [], + prompts=( + { + template_key: { + "system": prompt["system"], + "user": prompt["user"], + "responses": [llm_response] if llm_response is not None else [], + } } - } if database.config.log_prompts else None, + if database.config.log_prompts + else None + ), ) result.prompt = prompt diff --git a/openevolve/llm/ensemble.py b/openevolve/llm/ensemble.py index 749b46aa1..e3c471673 100644 --- a/openevolve/llm/ensemble.py +++ b/openevolve/llm/ensemble.py @@ -21,7 +21,10 @@ def __init__(self, models_cfg: List[LLMModelConfig]): self.models_cfg = models_cfg # Initialize models from the configuration - self.models = [model_cfg.init_client(model_cfg) if model_cfg.init_client else OpenAILLM(model_cfg) for model_cfg in models_cfg] + self.models = [ + model_cfg.init_client(model_cfg) if model_cfg.init_client else OpenAILLM(model_cfg) + for model_cfg in models_cfg + ] # Extract and normalize model weights self.weights = [model.weight for model in models_cfg] diff --git a/openevolve/llm/openai.py b/openevolve/llm/openai.py index 48cd81f96..4f86f9bb9 100644 --- a/openevolve/llm/openai.py +++ b/openevolve/llm/openai.py @@ -90,7 +90,7 @@ async def generate_with_context( # Check if this is an OpenAI reasoning model based on model name pattern # This works for all endpoints (OpenAI, Azure, OptiLLM, OpenRouter, etc.) model_lower = str(self.model).lower() - is_openai_reasoning_model = model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES) + is_openai_reasoning_model = model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES) if is_openai_reasoning_model: # For OpenAI reasoning models diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py index 1abc33bef..5e5ede932 100644 --- a/openevolve/process_parallel.py +++ b/openevolve/process_parallel.py @@ -204,13 +204,13 @@ def _run_iteration_worker( if _worker_config.diff_based_evolution: from openevolve.utils.code_utils import apply_diff, extract_diffs, format_diff_summary - diff_blocks = extract_diffs(llm_response) + diff_blocks = extract_diffs(llm_response, _worker_config.diff_pattern) if not diff_blocks: return SerializableResult( error=f"No valid diffs found in response", iteration=iteration ) - child_code = apply_diff(parent.code, llm_response) + child_code = apply_diff(parent.code, llm_response, _worker_config.diff_pattern) changes_summary = format_diff_summary(diff_blocks) else: from openevolve.utils.code_utils import parse_full_rewrite diff --git a/openevolve/test_regional_endpoint.py b/openevolve/test_regional_endpoint.py index 597ef1110..d813819e8 100644 --- a/openevolve/test_regional_endpoint.py +++ b/openevolve/test_regional_endpoint.py @@ -3,14 +3,22 @@ Run this to verify the fix works correctly """ + def test_endpoint_detection(): """Test that all OpenAI regional endpoints are detected correctly""" - + OPENAI_REASONING_MODEL_PREFIXES = ( - "o1-", "o1", "o3-", "o3", "o4-", - "gpt-5-", "gpt-5", "gpt-oss-120b", "gpt-oss-20b", + "o1-", + "o1", + "o3-", + "o3", + "o4-", + "gpt-5-", + "gpt-5", + "gpt-oss-120b", + "gpt-oss-20b", ) - + test_cases = [ # (api_base, model, should_be_reasoning_model, description) ("https://eu.api.openai.com/v1", "o1-mini", True, "EU endpoint with o1-mini"), @@ -25,18 +33,18 @@ def test_endpoint_detection(): ("https://eu.api.openai.com/v1", "O1-MINI", True, "EU with uppercase model"), ("HTTPS://EU.API.OPENAI.COM/v1", "o1-mini", True, "Uppercase URL"), ] - + print("Testing Regional Endpoint Detection Logic") print("=" * 80) - + passed = 0 failed = 0 - + for api_base, model, expected_result, description in test_cases: # This is the exact logic from your fixed code model_lower = str(model).lower() api_base_lower = (api_base or "").lower() - + is_openai_api = ( api_base_lower.startswith("https://api.openai.com") or api_base_lower.startswith("https://eu.api.openai.com") @@ -45,16 +53,15 @@ def test_endpoint_detection(): or api_base_lower.startswith("http://eu.api.openai.com") or api_base_lower.startswith("http://apac.api.openai.com") ) - - is_openai_reasoning_model = ( - is_openai_api - and model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES) + + is_openai_reasoning_model = is_openai_api and model_lower.startswith( + OPENAI_REASONING_MODEL_PREFIXES ) - + # Determine which parameter would be used param_used = "max_completion_tokens" if is_openai_reasoning_model else "max_tokens" expected_param = "max_completion_tokens" if expected_result else "max_tokens" - + # Check if result matches expectation if is_openai_reasoning_model == expected_result: status = "✅ PASS" @@ -62,7 +69,7 @@ def test_endpoint_detection(): else: status = "❌ FAIL" failed += 1 - + print(f"\n{status} | {description}") print(f" API Base: {api_base}") print(f" Model: {model}") @@ -70,10 +77,10 @@ def test_endpoint_detection(): print(f" is_reasoning_model: {is_openai_reasoning_model}") print(f" Parameter used: {param_used}") print(f" Expected: {expected_param}") - + print("\n" + "=" * 80) print(f"Results: {passed} passed, {failed} failed out of {len(test_cases)} tests") - + if failed == 0: print("🎉 All tests PASSED! The fix is working correctly.") return True @@ -81,6 +88,7 @@ def test_endpoint_detection(): print("⚠️ Some tests FAILED! Please review the logic.") return False + if __name__ == "__main__": success = test_endpoint_detection() - exit(0 if success else 1) \ No newline at end of file + exit(0 if success else 1) diff --git a/openevolve/utils/code_utils.py b/openevolve/utils/code_utils.py index 60fb63001..5e6a72c6d 100644 --- a/openevolve/utils/code_utils.py +++ b/openevolve/utils/code_utils.py @@ -37,13 +37,18 @@ def parse_evolve_blocks(code: str) -> List[Tuple[int, int, str]]: return blocks -def apply_diff(original_code: str, diff_text: str) -> str: +def apply_diff( + original_code: str, + diff_text: str, + diff_pattern: str = r"<<<<<<< SEARCH\n(.*?)=======\n(.*?)>>>>>>> REPLACE", +) -> str: """ Apply a diff to the original code Args: original_code: Original source code diff_text: Diff in the SEARCH/REPLACE format + diff_pattern: Regex pattern for the SEARCH/REPLACE format Returns: Modified code @@ -53,7 +58,7 @@ def apply_diff(original_code: str, diff_text: str) -> str: result_lines = original_lines.copy() # Extract diff blocks - diff_blocks = extract_diffs(diff_text) + diff_blocks = extract_diffs(diff_text, diff_pattern) # Apply each diff block for search_text, replace_text in diff_blocks: @@ -70,17 +75,19 @@ def apply_diff(original_code: str, diff_text: str) -> str: return "\n".join(result_lines) -def extract_diffs(diff_text: str) -> List[Tuple[str, str]]: +def extract_diffs( + diff_text: str, diff_pattern: str = r"<<<<<<< SEARCH\n(.*?)=======\n(.*?)>>>>>>> REPLACE" +) -> List[Tuple[str, str]]: """ Extract diff blocks from the diff text Args: diff_text: Diff in the SEARCH/REPLACE format + diff_pattern: Regex pattern for the SEARCH/REPLACE format Returns: List of tuples (search_text, replace_text) """ - diff_pattern = r"<<<<<<< SEARCH\n(.*?)=======\n(.*?)>>>>>>> REPLACE" diff_blocks = re.findall(diff_pattern, diff_text, re.DOTALL) return [(match[0].rstrip(), match[1].rstrip()) for match in diff_blocks] diff --git a/openevolve/utils/trace_export_utils.py b/openevolve/utils/trace_export_utils.py index df033ae8b..27d442cdc 100644 --- a/openevolve/utils/trace_export_utils.py +++ b/openevolve/utils/trace_export_utils.py @@ -11,10 +11,12 @@ logger = logging.getLogger(__name__) -def export_traces_jsonl(traces: List[Any], output_path: Union[str, Path], compress: bool = False) -> None: +def export_traces_jsonl( + traces: List[Any], output_path: Union[str, Path], compress: bool = False +) -> None: """ Export traces to JSONL format (one JSON object per line) - + Args: traces: List of trace objects with to_dict() method output_path: Path to save the JSONL file @@ -22,30 +24,33 @@ def export_traces_jsonl(traces: List[Any], output_path: Union[str, Path], compre """ output_path = Path(output_path) output_path.parent.mkdir(parents=True, exist_ok=True) - + if compress: import gzip - if not output_path.suffix == '.gz': - output_path = output_path.with_suffix(output_path.suffix + '.gz') + + if not output_path.suffix == ".gz": + output_path = output_path.with_suffix(output_path.suffix + ".gz") open_func = gzip.open - mode = 'wt' + mode = "wt" else: open_func = open - mode = 'w' - + mode = "w" + with open_func(output_path, mode) as f: for trace in traces: - trace_dict = trace.to_dict() if hasattr(trace, 'to_dict') else trace + trace_dict = trace.to_dict() if hasattr(trace, "to_dict") else trace json.dump(trace_dict, f) - f.write('\n') - + f.write("\n") + logger.info(f"Exported {len(traces)} traces to {output_path}") -def export_traces_json(traces: List[Any], output_path: Union[str, Path], metadata: Optional[Dict[str, Any]] = None) -> None: +def export_traces_json( + traces: List[Any], output_path: Union[str, Path], metadata: Optional[Dict[str, Any]] = None +) -> None: """ Export traces to JSON format with metadata - + Args: traces: List of trace objects with to_dict() method output_path: Path to save the JSON file @@ -53,37 +58,36 @@ def export_traces_json(traces: List[Any], output_path: Union[str, Path], metadat """ output_path = Path(output_path) output_path.parent.mkdir(parents=True, exist_ok=True) - + # Convert traces to dictionaries trace_dicts = [] for trace in traces: - if hasattr(trace, 'to_dict'): + if hasattr(trace, "to_dict"): trace_dicts.append(trace.to_dict()) else: trace_dicts.append(trace) - + # Build output structure - output_data = { - "metadata": metadata or {}, - "traces": trace_dicts - } - + output_data = {"metadata": metadata or {}, "traces": trace_dicts} + # Add default metadata output_data["metadata"].setdefault("total_traces", len(trace_dicts)) output_data["metadata"].setdefault("exported_at", time.time()) - - with open(output_path, 'w') as f: + + with open(output_path, "w") as f: json.dump(output_data, f, indent=2) - + logger.info(f"Exported {len(traces)} traces to {output_path}") -def export_traces_hdf5(traces: List[Any], output_path: Union[str, Path], metadata: Optional[Dict[str, Any]] = None) -> None: +def export_traces_hdf5( + traces: List[Any], output_path: Union[str, Path], metadata: Optional[Dict[str, Any]] = None +) -> None: """ Export traces to HDF5 format - + Args: - traces: List of trace objects with to_dict() method + traces: List of trace objects with to_dict() method output_path: Path to save the HDF5 file metadata: Optional metadata to include in the output """ @@ -93,15 +97,15 @@ def export_traces_hdf5(traces: List[Any], output_path: Union[str, Path], metadat except ImportError: logger.error("h5py is required for HDF5 export. Install with: pip install h5py") raise ImportError("h5py not installed") - + output_path = Path(output_path) output_path.parent.mkdir(parents=True, exist_ok=True) - - with h5py.File(output_path, 'w') as f: + + with h5py.File(output_path, "w") as f: # Create groups - traces_group = f.create_group('traces') - meta_group = f.create_group('metadata') - + traces_group = f.create_group("traces") + meta_group = f.create_group("metadata") + # Add metadata if metadata: for key, value in metadata.items(): @@ -109,19 +113,19 @@ def export_traces_hdf5(traces: List[Any], output_path: Union[str, Path], metadat meta_group.attrs[key] = value else: meta_group.attrs[key] = json.dumps(value) - - meta_group.attrs['total_traces'] = len(traces) - meta_group.attrs['exported_at'] = time.time() - + + meta_group.attrs["total_traces"] = len(traces) + meta_group.attrs["exported_at"] = time.time() + # Add traces for i, trace in enumerate(traces): - trace_dict = trace.to_dict() if hasattr(trace, 'to_dict') else trace - trace_group = traces_group.create_group(f'trace_{i:06d}') - + trace_dict = trace.to_dict() if hasattr(trace, "to_dict") else trace + trace_group = traces_group.create_group(f"trace_{i:06d}") + for key, value in trace_dict.items(): if value is None: continue - + if isinstance(value, dict): # Store dictionaries as JSON strings in attributes trace_group.attrs[key] = json.dumps(value) @@ -142,94 +146,96 @@ def export_traces_hdf5(traces: List[Any], output_path: Union[str, Path], metadat else: # Store other types as JSON trace_group.attrs[key] = json.dumps(value) - + logger.info(f"Exported {len(traces)} traces to {output_path}") def append_trace_jsonl(trace: Any, output_path: Union[str, Path], compress: bool = False) -> None: """ Append a single trace to a JSONL file - + Args: trace: Trace object with to_dict() method output_path: Path to the JSONL file compress: Whether the file is compressed with gzip """ output_path = Path(output_path) - + if compress: import gzip - if not output_path.suffix == '.gz': - output_path = output_path.with_suffix(output_path.suffix + '.gz') + + if not output_path.suffix == ".gz": + output_path = output_path.with_suffix(output_path.suffix + ".gz") open_func = gzip.open - mode = 'at' + mode = "at" else: open_func = open - mode = 'a' - - trace_dict = trace.to_dict() if hasattr(trace, 'to_dict') else trace - + mode = "a" + + trace_dict = trace.to_dict() if hasattr(trace, "to_dict") else trace + with open_func(output_path, mode) as f: json.dump(trace_dict, f) - f.write('\n') + f.write("\n") def load_traces_jsonl(input_path: Union[str, Path], compress: bool = False) -> List[Dict[str, Any]]: """ Load traces from a JSONL file - + Args: input_path: Path to the JSONL file compress: Whether the file is compressed with gzip - + Returns: List of trace dictionaries """ input_path = Path(input_path) - - if compress or input_path.suffix == '.gz': + + if compress or input_path.suffix == ".gz": import gzip + open_func = gzip.open - mode = 'rt' + mode = "rt" else: open_func = open - mode = 'r' - + mode = "r" + traces = [] with open_func(input_path, mode) as f: for line in f: if line.strip(): traces.append(json.loads(line)) - + return traces def load_traces_json(input_path: Union[str, Path]) -> tuple[List[Dict[str, Any]], Dict[str, Any]]: """ Load traces from a JSON file - + Args: input_path: Path to the JSON file - + Returns: Tuple of (traces list, metadata dict) """ - with open(input_path, 'r') as f: + with open(input_path, "r") as f: data = json.load(f) - - traces = data.get('traces', []) - metadata = data.get('metadata', {}) - + + traces = data.get("traces", []) + metadata = data.get("metadata", {}) + return traces, metadata def load_traces_hdf5(input_path: Union[str, Path]) -> tuple[List[Dict[str, Any]], Dict[str, Any]]: """ Load traces from an HDF5 file - + Args: input_path: Path to the HDF5 file - + Returns: Tuple of (traces list, metadata dict) """ @@ -238,51 +244,51 @@ def load_traces_hdf5(input_path: Union[str, Path]) -> tuple[List[Dict[str, Any]] except ImportError: logger.error("h5py is required for HDF5 loading. Install with: pip install h5py") raise ImportError("h5py not installed") - + traces = [] metadata = {} - - with h5py.File(input_path, 'r') as f: + + with h5py.File(input_path, "r") as f: # Load metadata - if 'metadata' in f: - meta_group = f['metadata'] + if "metadata" in f: + meta_group = f["metadata"] for key in meta_group.attrs: value = meta_group.attrs[key] # Try to parse JSON strings - if isinstance(value, str) and value.startswith('{'): + if isinstance(value, str) and value.startswith("{"): try: metadata[key] = json.loads(value) except json.JSONDecodeError: metadata[key] = value else: metadata[key] = value - + # Load traces - if 'traces' in f: - traces_group = f['traces'] + if "traces" in f: + traces_group = f["traces"] for trace_name in sorted(traces_group.keys()): trace_group = traces_group[trace_name] trace_dict = {} - + # Load attributes for key in trace_group.attrs: value = trace_group.attrs[key] # Try to parse JSON strings - if isinstance(value, str) and (value.startswith('{') or value.startswith('[')): + if isinstance(value, str) and (value.startswith("{") or value.startswith("[")): try: trace_dict[key] = json.loads(value) except json.JSONDecodeError: trace_dict[key] = value else: trace_dict[key] = value - + # Load datasets for key in trace_group.keys(): dataset = trace_group[key] trace_dict[key] = dataset[...].tolist() - + traces.append(trace_dict) - + return traces, metadata @@ -291,11 +297,11 @@ def export_traces( output_path: Union[str, Path], format: str = "jsonl", compress: bool = False, - metadata: Optional[Dict[str, Any]] = None + metadata: Optional[Dict[str, Any]] = None, ) -> None: """ Export traces to specified format - + Args: traces: List of trace objects output_path: Path to save the file @@ -304,7 +310,7 @@ def export_traces( metadata: Optional metadata (json and hdf5 only) """ format = format.lower() - + if format == "jsonl": export_traces_jsonl(traces, output_path, compress=compress) elif format == "json": @@ -316,54 +322,53 @@ def export_traces( def load_traces( - input_path: Union[str, Path], - format: Optional[str] = None + input_path: Union[str, Path], format: Optional[str] = None ) -> Union[List[Dict[str, Any]], tuple[List[Dict[str, Any]], Dict[str, Any]]]: """ Load traces from file, auto-detecting format if not specified - + Args: input_path: Path to the file format: Optional format ('jsonl', 'json', 'hdf5'). Auto-detected if None. - + Returns: For JSONL: List of trace dictionaries For JSON/HDF5: Tuple of (traces list, metadata dict) """ input_path = Path(input_path) - + # Auto-detect format from extension if format is None: - if input_path.suffix in ['.jsonl', '.gz']: - format = 'jsonl' - elif input_path.suffix == '.json': - format = 'json' - elif input_path.suffix in ['.h5', '.hdf5']: - format = 'hdf5' + if input_path.suffix in [".jsonl", ".gz"]: + format = "jsonl" + elif input_path.suffix == ".json": + format = "json" + elif input_path.suffix in [".h5", ".hdf5"]: + format = "hdf5" else: # Try to detect from content - with open(input_path, 'rb') as f: + with open(input_path, "rb") as f: first_bytes = f.read(10) - if first_bytes.startswith(b'\x89HDF'): - format = 'hdf5' - elif first_bytes.startswith(b'{'): + if first_bytes.startswith(b"\x89HDF"): + format = "hdf5" + elif first_bytes.startswith(b"{"): # Could be JSON or JSONL, check for newlines f.seek(0) content = f.read(1000) - if b'\n{' in content or b'\n[' in content: - format = 'jsonl' + if b"\n{" in content or b"\n[" in content: + format = "jsonl" else: - format = 'json' + format = "json" else: - format = 'jsonl' # Default assumption - + format = "jsonl" # Default assumption + format = format.lower() - - if format == 'jsonl': - return load_traces_jsonl(input_path, compress=input_path.suffix == '.gz') - elif format == 'json': + + if format == "jsonl": + return load_traces_jsonl(input_path, compress=input_path.suffix == ".gz") + elif format == "json": return load_traces_json(input_path) - elif format == 'hdf5': + elif format == "hdf5": return load_traces_hdf5(input_path) else: - raise ValueError(f"Unsupported format: {format}") \ No newline at end of file + raise ValueError(f"Unsupported format: {format}")