diff --git a/openevolve/_version.py b/openevolve/_version.py index 4ae89c06..0b8c3430 100644 --- a/openevolve/_version.py +++ b/openevolve/_version.py @@ -1,3 +1,3 @@ """Version information for openevolve package.""" -__version__ = "0.2.1" +__version__ = "0.2.2" diff --git a/openevolve/database.py b/openevolve/database.py index fa4ef05b..050f7b7f 100644 --- a/openevolve/database.py +++ b/openevolve/database.py @@ -188,7 +188,7 @@ def add( Args: program: Program to add iteration: Current iteration (defaults to last_iteration) - target_island: Specific island to add to (uses current_island if None) + target_island: Specific island to add to (auto-detects parent's island if None) Returns: Program ID @@ -263,8 +263,34 @@ def add( self.feature_map[feature_key] = program.id - # Add to specific island (not random!) - island_idx = target_island if target_island is not None else self.current_island + # Determine target island + # If target_island is not specified and program has a parent, inherit parent's island + if target_island is None and program.parent_id: + parent = self.programs.get(program.parent_id) + if parent and "island" in parent.metadata: + # Child inherits parent's island to maintain island isolation + island_idx = parent.metadata["island"] + logger.debug( + f"Program {program.id} inheriting island {island_idx} from parent {program.parent_id}" + ) + else: + # Parent not found or has no island, use current_island + island_idx = self.current_island + if parent: + logger.warning( + f"Parent {program.parent_id} has no island metadata, using current_island {island_idx}" + ) + else: + logger.warning( + f"Parent {program.parent_id} not found, using current_island {island_idx}" + ) + elif target_island is not None: + # Explicit target island specified (e.g., for migrants) + island_idx = target_island + else: + # No parent and no target specified, use current island + island_idx = self.current_island + island_idx = island_idx % len(self.islands) # Ensure valid island self.islands[island_idx].add(program.id) diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py index 5c01b8b4..27276702 100644 --- a/openevolve/process_parallel.py +++ b/openevolve/process_parallel.py @@ -441,7 +441,8 @@ async def run_evolution( # Reconstruct program from dict child_program = Program(**result.child_program_dict) - # Add to database + # Add to database (will auto-inherit parent's island) + # No need to specify target_island - database will handle parent island inheritance self.database.add(child_program, iteration=completed_iteration) # Store artifacts diff --git a/openevolve/prompt/sampler.py b/openevolve/prompt/sampler.py index 8125f61c..3050c019 100644 --- a/openevolve/prompt/sampler.py +++ b/openevolve/prompt/sampler.py @@ -402,7 +402,7 @@ def _format_evolution_history( combined_programs_str = top_programs_str + diverse_programs_str # Format inspirations section - inspirations_section_str = self._format_inspirations_section(inspirations, language) + inspirations_section_str = self._format_inspirations_section(inspirations, language, feature_dimensions) # Combine into full history return history_template.format( @@ -412,7 +412,7 @@ def _format_evolution_history( ) def _format_inspirations_section( - self, inspirations: List[Dict[str, Any]], language: str + self, inspirations: List[Dict[str, Any]], language: str, feature_dimensions: Optional[List[str]] = None ) -> str: """ Format the inspirations section for the prompt diff --git a/tests/test_island_parent_consistency.py b/tests/test_island_parent_consistency.py new file mode 100644 index 00000000..ad6bd385 --- /dev/null +++ b/tests/test_island_parent_consistency.py @@ -0,0 +1,174 @@ +""" +Test for island parent-child consistency - Programs' parents should be in the corresponding islands +""" + +import unittest +from openevolve.config import Config +from openevolve.database import ProgramDatabase, Program + + +class TestIslandParentConsistency(unittest.TestCase): + """Test that parent-child relationships respect island boundaries""" + + def test_parent_child_island_consistency(self): + """Test that children are added to the same island as their parents""" + config = Config() + config.database.num_islands = 3 + database = ProgramDatabase(config.database) + + # Create initial program on island 0 + initial_program = Program( + id="initial", + code="def initial(): pass", + metrics={"score": 0.5}, + iteration_found=0 + ) + database.add(initial_program) # Should go to island 0 (current_island) + + # Verify initial program is on island 0 + self.assertIn("initial", database.islands[0]) + self.assertEqual(initial_program.metadata.get("island"), 0) + + # Now switch to island 1 + database.next_island() + self.assertEqual(database.current_island, 1) + + # Create a child of the initial program + child_program = Program( + id="child1", + code="def child1(): pass", + parent_id="initial", # Parent is on island 0 + metrics={"score": 0.6}, + iteration_found=1 + ) + + # Add child without specifying target_island + # This is what happens in process_parallel.py line 445 + database.add(child_program) + + # With the fix: child should go to parent's island (0), not current_island (1) + parent_island = database.programs["initial"].metadata.get("island", 0) + child_island = database.programs["child1"].metadata.get("island") + + # Check if parent is in child's island (this is what the user's assertion checks) + if child_program.parent_id: + # This is the exact check from the issue report - should now pass + self.assertIn( + child_program.parent_id, + database.islands[child_island], + "Parent should be in child's island" + ) + + # Verify child is on same island as parent + self.assertEqual( + parent_island, + child_island, + f"Child should be on same island as parent. Parent: island {parent_island}, Child: island {child_island}" + ) + + def test_multiple_generations_island_drift(self): + """Test that island drift happens across multiple generations""" + config = Config() + config.database.num_islands = 4 + database = ProgramDatabase(config.database) + + # Create a lineage + programs = [] + for i in range(10): + if i == 0: + # Initial program + prog = Program( + id=f"prog_{i}", + code=f"def prog_{i}(): pass", + metrics={"score": 0.1 * i}, + iteration_found=i + ) + else: + # Child of previous + prog = Program( + id=f"prog_{i}", + code=f"def prog_{i}(): pass", + parent_id=f"prog_{i-1}", + metrics={"score": 0.1 * i}, + iteration_found=i + ) + + database.add(prog) + programs.append(prog) + + # Switch islands periodically (simulating what happens in evolution) + if i % 3 == 0: + database.next_island() + + # Check island consistency + inconsistent_pairs = [] + for prog in programs: + if prog.parent_id: + parent = database.programs.get(prog.parent_id) + if parent: + parent_island = parent.metadata.get("island") + child_island = prog.metadata.get("island") + + # Check if parent is in child's island + if prog.parent_id not in database.islands[child_island]: + inconsistent_pairs.append((prog.parent_id, prog.id)) + + # With the fix, we should find NO inconsistent parent-child island assignments + self.assertEqual( + len(inconsistent_pairs), + 0, + f"Found {len(inconsistent_pairs)} inconsistent parent-child pairs: {inconsistent_pairs}" + ) + + # Verify all parent-child pairs are on the same island + for prog in programs: + if prog.parent_id: + parent = database.programs.get(prog.parent_id) + if parent: + parent_island = parent.metadata.get("island") + child_island = prog.metadata.get("island") + self.assertEqual( + parent_island, + child_island, + f"Parent {prog.parent_id} (island {parent_island}) and " + f"child {prog.id} (island {child_island}) should be on same island" + ) + + + def test_explicit_migration_override(self): + """Test that explicit target_island overrides parent island inheritance""" + config = Config() + config.database.num_islands = 3 + database = ProgramDatabase(config.database) + + # Create parent on island 0 + parent = Program( + id="parent", + code="def parent(): pass", + metrics={"score": 0.5}, + iteration_found=0 + ) + database.add(parent) # Goes to island 0 + self.assertIn("parent", database.islands[0]) + + # Create child but explicitly send to island 2 (migration) + migrant_child = Program( + id="migrant", + code="def migrant(): pass", + parent_id="parent", + metrics={"score": 0.7}, + iteration_found=1 + ) + database.add(migrant_child, target_island=2) # Explicit migration + + # Verify migrant went to island 2, not parent's island 0 + self.assertIn("migrant", database.islands[2]) + self.assertNotIn("migrant", database.islands[0]) + self.assertEqual(migrant_child.metadata.get("island"), 2) + + # Parent should still be on island 0 + self.assertEqual(database.programs["parent"].metadata.get("island"), 0) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/test_prompt_sampler_comprehensive.py b/tests/test_prompt_sampler_comprehensive.py new file mode 100644 index 00000000..a0ae9292 --- /dev/null +++ b/tests/test_prompt_sampler_comprehensive.py @@ -0,0 +1,297 @@ +""" +Comprehensive tests for PromptSampler including inspirations and feature_dimensions +""" + +import unittest +from unittest.mock import MagicMock, patch +from openevolve.config import Config +from openevolve.prompt.sampler import PromptSampler + + +class TestPromptSamplerComprehensive(unittest.TestCase): + """Comprehensive tests for prompt sampler edge cases""" + + def setUp(self): + """Set up test prompt sampler""" + config = Config() + # Add feature dimensions to config for testing + config.database.feature_dimensions = ["complexity", "memory_usage"] + self.prompt_sampler = PromptSampler(config.prompt) + self.feature_dimensions = config.database.feature_dimensions + + def test_build_prompt_with_inspirations(self): + """Test building a prompt with inspiration programs""" + current_program = "def optimized(): pass" + parent_program = "def original(): pass" + program_metrics = { + "combined_score": 0.85, + "accuracy": 0.9, + "speed": 0.8, + "complexity": 5, + "memory_usage": 100 + } + + # Create inspirations with diverse characteristics + inspirations = [ + { + "id": "insp1", + "code": "def fast_implementation(): pass", + "metrics": { + "combined_score": 0.75, + "accuracy": 0.7, + "speed": 0.95, + "complexity": 3, + "memory_usage": 50 + }, + "metadata": {"diverse": True} + }, + { + "id": "insp2", + "code": "def memory_efficient(): pass", + "metrics": { + "combined_score": 0.65, + "accuracy": 0.8, + "speed": 0.5, + "complexity": 7, + "memory_usage": 20 + }, + "metadata": {"migrant": True} + } + ] + + # Build prompt with inspirations and feature_dimensions + prompt = self.prompt_sampler.build_prompt( + current_program=current_program, + parent_program=parent_program, + program_metrics=program_metrics, + inspirations=inspirations, + feature_dimensions=self.feature_dimensions + ) + + # Verify prompt was built successfully + self.assertIn("system", prompt) + self.assertIn("user", prompt) + + # Check that inspirations are included + self.assertIn("fast_implementation", prompt["user"]) + self.assertIn("memory_efficient", prompt["user"]) + + # Verify fitness scores are calculated correctly (excluding feature dimensions) + # The inspirations should show their fitness scores, not including complexity/memory_usage + self.assertIn("0.75", prompt["user"]) # insp1's combined_score + self.assertIn("0.65", prompt["user"]) # insp2's combined_score + + def test_format_inspirations_section_with_feature_dimensions(self): + """Test _format_inspirations_section directly with feature_dimensions""" + inspirations = [ + { + "id": "test1", + "code": "def test_func(): return 42", + "metrics": { + "combined_score": 0.9, + "accuracy": 0.95, + "complexity": 10, # Feature dimension + "memory_usage": 200 # Feature dimension + }, + "metadata": {"diverse": True} + } + ] + + # Call the method directly + result = self.prompt_sampler._format_inspirations_section( + inspirations, + "python", + feature_dimensions=["complexity", "memory_usage"] + ) + + # Should not raise NameError + self.assertIsInstance(result, str) + self.assertIn("test_func", result) + self.assertIn("0.9000", result) # The fitness score + + def test_format_inspirations_section_without_feature_dimensions(self): + """Test _format_inspirations_section works without feature_dimensions""" + inspirations = [ + { + "id": "test2", + "code": "def another_func(): pass", + "metrics": {"score": 0.7, "time": 1.2}, + "metadata": {} + } + ] + + # Call without feature_dimensions (should use default of None) + result = self.prompt_sampler._format_inspirations_section( + inspirations, + "python" + ) + + self.assertIsInstance(result, str) + self.assertIn("another_func", result) + + def test_determine_program_type_with_feature_dimensions(self): + """Test _determine_program_type with feature_dimensions parameter""" + program = { + "metrics": { + "combined_score": 0.85, + "complexity": 5, + "memory_usage": 100 + }, + "metadata": {} + } + + # Test with feature_dimensions + program_type = self.prompt_sampler._determine_program_type( + program, + feature_dimensions=["complexity", "memory_usage"] + ) + + self.assertEqual(program_type, "High-Performer") # Based on combined_score of 0.85 + + def test_extract_unique_features_calls_determine_program_type(self): + """Test that _extract_unique_features correctly handles program_type determination""" + program = { + "code": "", # Empty code to trigger default features + "metrics": {"score": 0.5}, + "metadata": {} + } + + # This should not raise NameError when calling _determine_program_type + features = self.prompt_sampler._extract_unique_features(program) + + self.assertIsInstance(features, str) + self.assertIn("approach to the problem", features) + + def test_build_prompt_with_all_optional_parameters(self): + """Test build_prompt with all optional parameters including inspirations""" + current_program = "def main(): pass" + + # Comprehensive test data + previous_programs = [ + {"id": "prev1", "code": "def v1(): pass", "metrics": {"score": 0.3}} + ] + top_programs = [ + {"id": "top1", "code": "def best(): pass", "metrics": {"combined_score": 0.95}} + ] + inspirations = [ + {"id": "insp1", "code": "def creative(): pass", "metrics": {"score": 0.6}} + ] + + prompt = self.prompt_sampler.build_prompt( + current_program=current_program, + parent_program="def parent(): pass", + program_metrics={"combined_score": 0.7, "feature1": 10}, + previous_programs=previous_programs, + top_programs=top_programs, + inspirations=inspirations, + language="python", + evolution_round=5, + diff_based_evolution=True, + feature_dimensions=["feature1"], + program_artifacts={"output": "test output"} + ) + + self.assertIn("system", prompt) + self.assertIn("user", prompt) + # Verify all components are included + self.assertIn("main", prompt["user"]) + self.assertIn("best", prompt["user"]) + self.assertIn("creative", prompt["user"]) + + def test_fitness_calculation_consistency(self): + """Test that fitness calculation is consistent across all methods""" + metrics = { + "combined_score": 0.8, + "accuracy": 0.9, + "speed": 0.7, + "complexity": 5, # Feature dimension + "memory_usage": 100 # Feature dimension + } + feature_dimensions = ["complexity", "memory_usage"] + + # Build a prompt with these metrics + prompt = self.prompt_sampler.build_prompt( + current_program="def test(): pass", + program_metrics=metrics, + inspirations=[ + {"id": "i1", "code": "pass", "metrics": metrics} + ], + feature_dimensions=feature_dimensions + ) + + # The fitness score should be 0.8 (combined_score), not an average including features + self.assertIn("0.8000", prompt["user"]) # Fitness score in prompt + + def test_empty_inspirations_list(self): + """Test that empty inspirations list doesn't break anything""" + prompt = self.prompt_sampler.build_prompt( + current_program="def empty(): pass", + inspirations=[], # Empty list + feature_dimensions=["test_feature"] + ) + + self.assertIn("system", prompt) + self.assertIn("user", prompt) + # Should complete without errors + + def test_inspirations_with_missing_metrics(self): + """Test handling of inspirations with missing or invalid metrics""" + inspirations = [ + { + "id": "bad1", + "code": "def bad(): pass", + "metrics": {}, # Empty metrics + }, + { + "id": "bad2", + "code": "def worse(): pass", + # No metrics key at all + } + ] + + # Should handle gracefully without errors + result = self.prompt_sampler._format_inspirations_section( + inspirations, + "python", + feature_dimensions=["test"] + ) + + self.assertIsInstance(result, str) + + def test_feature_dimensions_none_vs_empty_list(self): + """Test that None and empty list for feature_dimensions are handled correctly""" + program = {"metrics": {"score": 0.5}} + + # Test with None + type_none = self.prompt_sampler._determine_program_type(program, None) + + # Test with empty list + type_empty = self.prompt_sampler._determine_program_type(program, []) + + # Both should work and give same result + self.assertEqual(type_none, type_empty) + + def test_feature_coordinates_formatting_in_prompt(self): + """Test that feature coordinates are formatted correctly in the prompt""" + metrics = { + "combined_score": 0.75, + "complexity": 8, + "memory_usage": 150, + "cpu_usage": 0.3 + } + + prompt = self.prompt_sampler.build_prompt( + current_program="def test(): pass", + program_metrics=metrics, + feature_dimensions=["complexity", "memory_usage", "cpu_usage"] + ) + + # Check that feature coordinates are included + user_msg = prompt["user"] + self.assertIn("complexity", user_msg) + self.assertIn("memory_usage", user_msg) + self.assertIn("cpu_usage", user_msg) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file