From bf789822f68ecf38d0dddb69db2dd6aa69f7d899 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sat, 30 Aug 2025 23:02:58 +0800 Subject: [PATCH 1/2] fix timeout --- openevolve/process_parallel.py | 14 +- tests/test_checkpoint_resume.py | 4 +- tests/test_feature_stats_persistence.py | 137 ++++++++---------- tests/test_grid_stability.py | 139 ++++++++----------- tests/test_island_isolation.py | 154 +++++++++------------ tests/test_island_parent_consistency.py | 75 +++++----- tests/test_model_parameter_demo.py | 44 +++--- tests/test_openai_model_detection.py | 53 ++++--- tests/test_process_parallel.py | 7 +- tests/test_prompt_sampler_comprehensive.py | 127 +++++++---------- 10 files changed, 341 insertions(+), 413 deletions(-) diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py index c9da41634..4dd78a9ea 100644 --- a/openevolve/process_parallel.py +++ b/openevolve/process_parallel.py @@ -8,7 +8,7 @@ import pickle import signal import time -from concurrent.futures import ProcessPoolExecutor, Future +from concurrent.futures import ProcessPoolExecutor, Future, TimeoutError as FutureTimeoutError from dataclasses import dataclass, asdict from pathlib import Path from typing import Any, Dict, List, Optional, Tuple @@ -454,7 +454,9 @@ async def run_evolution( future = pending_futures.pop(completed_iteration) try: - result = future.result() + # Use evaluator timeout + buffer to gracefully handle stuck processes + timeout_seconds = self.config.evaluator.timeout + 30 + result = future.result(timeout=timeout_seconds) if result.error: logger.warning(f"Iteration {completed_iteration} error: {result.error}") @@ -612,6 +614,14 @@ async def run_evolution( ) break + except FutureTimeoutError: + logger.error( + f"ā° Iteration {completed_iteration} timed out after {timeout_seconds}s " + f"(evaluator timeout: {self.config.evaluator.timeout}s + 30s buffer). " + f"Canceling future and continuing with next iteration." + ) + # Cancel the future to clean up the process + future.cancel() except Exception as e: logger.error(f"Error processing result from iteration {completed_iteration}: {e}") diff --git a/tests/test_checkpoint_resume.py b/tests/test_checkpoint_resume.py index eba314b7b..fa13a0592 100644 --- a/tests/test_checkpoint_resume.py +++ b/tests/test_checkpoint_resume.py @@ -308,7 +308,9 @@ async def run_test(): ) # Mock the parallel controller to avoid API calls - with patch("openevolve.controller.ProcessParallelController") as mock_parallel_class: + with patch( + "openevolve.controller.ProcessParallelController" + ) as mock_parallel_class: mock_parallel = MagicMock() mock_parallel.run_evolution = AsyncMock(return_value=None) mock_parallel.start = MagicMock() diff --git a/tests/test_feature_stats_persistence.py b/tests/test_feature_stats_persistence.py index 0b508e05d..236f09cf6 100644 --- a/tests/test_feature_stats_persistence.py +++ b/tests/test_feature_stats_persistence.py @@ -22,7 +22,7 @@ def setUp(self): self.config = DatabaseConfig( db_path=self.test_dir, feature_dimensions=["score", "custom_metric1", "custom_metric2"], - feature_bins=10 + feature_bins=10, ) def tearDown(self): @@ -33,7 +33,7 @@ def test_feature_stats_saved_and_loaded(self): """Test that feature_stats are correctly saved and loaded from checkpoints""" # Create database and add programs to build feature_stats db1 = ProgramDatabase(self.config) - + programs = [] for i in range(5): program = Program( @@ -42,8 +42,8 @@ def test_feature_stats_saved_and_loaded(self): metrics={ "combined_score": 0.1 + i * 0.2, "custom_metric1": 10 + i * 20, - "custom_metric2": 100 + i * 50 - } + "custom_metric2": 100 + i * 50, + }, ) programs.append(program) db1.add(program) @@ -52,14 +52,10 @@ def test_feature_stats_saved_and_loaded(self): self.assertIn("score", db1.feature_stats) self.assertIn("custom_metric1", db1.feature_stats) self.assertIn("custom_metric2", db1.feature_stats) - + # Store original feature_stats for comparison original_stats = { - dim: { - "min": stats["min"], - "max": stats["max"], - "values": stats["values"].copy() - } + dim: {"min": stats["min"], "max": stats["max"], "values": stats["values"].copy()} for dim, stats in db1.feature_stats.items() } @@ -72,11 +68,11 @@ def test_feature_stats_saved_and_loaded(self): # Verify feature_stats were loaded correctly self.assertEqual(len(db2.feature_stats), len(original_stats)) - + for dim, original in original_stats.items(): self.assertIn(dim, db2.feature_stats) loaded = db2.feature_stats[dim] - + self.assertAlmostEqual(loaded["min"], original["min"], places=5) self.assertAlmostEqual(loaded["max"], original["max"], places=5) self.assertEqual(loaded["values"], original["values"]) @@ -84,21 +80,21 @@ def test_feature_stats_saved_and_loaded(self): def test_empty_feature_stats_handling(self): """Test handling of empty feature_stats""" db1 = ProgramDatabase(self.config) - + # Save without any programs (empty feature_stats) db1.save(self.test_dir, iteration=1) - + # Load and verify db2 = ProgramDatabase(self.config) db2.load(self.test_dir) - + self.assertEqual(db2.feature_stats, {}) def test_backward_compatibility_missing_feature_stats(self): """Test loading checkpoints that don't have feature_stats (backward compatibility)""" # Create a checkpoint manually without feature_stats os.makedirs(self.test_dir, exist_ok=True) - + # Create metadata without feature_stats (simulating old checkpoint) metadata = { "feature_map": {}, @@ -112,60 +108,48 @@ def test_backward_compatibility_missing_feature_stats(self): "last_migration_generation": 0, # Note: no "feature_stats" key } - + with open(os.path.join(self.test_dir, "metadata.json"), "w") as f: json.dump(metadata, f) - + # Load should work without errors db = ProgramDatabase(self.config) db.load(self.test_dir) - + # feature_stats should be empty but not None self.assertEqual(db.feature_stats, {}) def test_feature_stats_serialization_edge_cases(self): """Test feature_stats serialization handles edge cases correctly""" db = ProgramDatabase(self.config) - + # Test with various edge cases db.feature_stats = { - "normal_case": { - "min": 1.0, - "max": 10.0, - "values": [1.0, 5.0, 10.0] - }, - "single_value": { - "min": 5.0, - "max": 5.0, - "values": [5.0] - }, + "normal_case": {"min": 1.0, "max": 10.0, "values": [1.0, 5.0, 10.0]}, + "single_value": {"min": 5.0, "max": 5.0, "values": [5.0]}, "large_values_list": { "min": 0.0, "max": 200.0, - "values": list(range(200)) # Should be truncated to 100 + "values": list(range(200)), # Should be truncated to 100 }, - "empty_values": { - "min": 0.0, - "max": 1.0, - "values": [] - } + "empty_values": {"min": 0.0, "max": 1.0, "values": []}, } - + # Test serialization serialized = db._serialize_feature_stats() - + # Check that large values list was truncated self.assertLessEqual(len(serialized["large_values_list"]["values"]), 100) - + # Test deserialization deserialized = db._deserialize_feature_stats(serialized) - + # Verify structure is maintained self.assertIn("normal_case", deserialized) self.assertIn("single_value", deserialized) self.assertIn("large_values_list", deserialized) self.assertIn("empty_values", deserialized) - + # Verify types are correct for dim, stats in deserialized.items(): self.assertIsInstance(stats["min"], float) @@ -176,9 +160,9 @@ def test_feature_stats_preservation_during_load(self): """Test that feature_stats ranges are preserved when loading from checkpoint""" # Create database with programs db1 = ProgramDatabase(self.config) - + test_programs = [] - + for i in range(3): program = Program( id=f"stats_test_{i}", @@ -186,8 +170,8 @@ def test_feature_stats_preservation_during_load(self): metrics={ "combined_score": 0.2 + i * 0.3, "custom_metric1": 20 + i * 30, - "custom_metric2": 200 + i * 100 - } + "custom_metric2": 200 + i * 100, + }, ) test_programs.append(program) db1.add(program) @@ -195,10 +179,7 @@ def test_feature_stats_preservation_during_load(self): # Record original feature ranges original_ranges = {} for dim, stats in db1.feature_stats.items(): - original_ranges[dim] = { - "min": stats["min"], - "max": stats["max"] - } + original_ranges[dim] = {"min": stats["min"], "max": stats["max"]} # Save checkpoint db1.save(self.test_dir, iteration=50) @@ -211,31 +192,35 @@ def test_feature_stats_preservation_during_load(self): for dim, original_range in original_ranges.items(): self.assertIn(dim, db2.feature_stats) loaded_stats = db2.feature_stats[dim] - + self.assertAlmostEqual( - loaded_stats["min"], original_range["min"], places=5, - msg=f"Min value changed for {dim}: {original_range['min']} -> {loaded_stats['min']}" + loaded_stats["min"], + original_range["min"], + places=5, + msg=f"Min value changed for {dim}: {original_range['min']} -> {loaded_stats['min']}", ) self.assertAlmostEqual( - loaded_stats["max"], original_range["max"], places=5, - msg=f"Max value changed for {dim}: {original_range['max']} -> {loaded_stats['max']}" + loaded_stats["max"], + original_range["max"], + places=5, + msg=f"Max value changed for {dim}: {original_range['max']} -> {loaded_stats['max']}", ) - + # Test that adding a new program within existing ranges doesn't break anything new_program = Program( id="range_test", code="# Program to test range stability", metrics={ "combined_score": 0.35, # Within existing range - "custom_metric1": 35, # Within existing range - "custom_metric2": 250 # Within existing range - } + "custom_metric1": 35, # Within existing range + "custom_metric2": 250, # Within existing range + }, ) - + # Adding this program should not cause issues db2.add(new_program) new_coords = db2._calculate_feature_coords(new_program) - + # Should get valid coordinates self.assertEqual(len(new_coords), len(self.config.feature_dimensions)) for coord in new_coords: @@ -245,25 +230,25 @@ def test_feature_stats_preservation_during_load(self): def test_feature_stats_with_numpy_types(self): """Test that numpy types are correctly handled in serialization""" import numpy as np - + db = ProgramDatabase(self.config) - + # Simulate feature_stats with numpy types db.feature_stats = { "numpy_test": { "min": np.float64(1.5), "max": np.float64(9.5), - "values": [np.float64(x) for x in [1.5, 5.0, 9.5]] + "values": [np.float64(x) for x in [1.5, 5.0, 9.5]], } } - + # Test serialization doesn't fail serialized = db._serialize_feature_stats() - + # Verify numpy types were converted to Python types self.assertIsInstance(serialized["numpy_test"]["min"], float) self.assertIsInstance(serialized["numpy_test"]["max"], float) - + # Test deserialization deserialized = db._deserialize_feature_stats(serialized) self.assertIsInstance(deserialized["numpy_test"]["min"], float) @@ -272,32 +257,28 @@ def test_feature_stats_with_numpy_types(self): def test_malformed_feature_stats_handling(self): """Test handling of malformed feature_stats during deserialization""" db = ProgramDatabase(self.config) - + # Test with malformed data malformed_data = { - "valid_entry": { - "min": 1.0, - "max": 10.0, - "values": [1.0, 5.0, 10.0] - }, + "valid_entry": {"min": 1.0, "max": 10.0, "values": [1.0, 5.0, 10.0]}, "invalid_entry": "this is not a dict", "missing_keys": { "min": 1.0 # missing "max" and "values" - } + }, } - - with patch('openevolve.database.logger') as mock_logger: + + with patch("openevolve.database.logger") as mock_logger: deserialized = db._deserialize_feature_stats(malformed_data) - + # Should have valid entry and skip invalid ones self.assertIn("valid_entry", deserialized) self.assertNotIn("invalid_entry", deserialized) self.assertIn("missing_keys", deserialized) # Should be created with defaults - + # Should have logged warning for invalid entry mock_logger.warning.assert_called() if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_grid_stability.py b/tests/test_grid_stability.py index 514d89b24..86a387511 100644 --- a/tests/test_grid_stability.py +++ b/tests/test_grid_stability.py @@ -27,24 +27,22 @@ def test_feature_ranges_preserved_across_checkpoints(self): config = DatabaseConfig( db_path=self.test_dir, feature_dimensions=["score", "prompt_length", "reasoning_sophistication"], - feature_bins=5 # Use smaller bins for easier testing + feature_bins=5, # Use smaller bins for easier testing ) # Phase 1: Create initial population with specific range db1 = ProgramDatabase(config) - + # Create programs with known metrics to establish ranges test_cases = [ {"combined_score": 0.2, "prompt_length": 100, "reasoning_sophistication": 0.1}, {"combined_score": 0.5, "prompt_length": 300, "reasoning_sophistication": 0.5}, {"combined_score": 0.8, "prompt_length": 500, "reasoning_sophistication": 0.9}, ] - + for i, metrics in enumerate(test_cases): program = Program( - id=f"range_test_{i}", - code=f"# Range test program {i}", - metrics=metrics + id=f"range_test_{i}", code=f"# Range test program {i}", metrics=metrics ) db1.add(program) @@ -54,7 +52,7 @@ def test_feature_ranges_preserved_across_checkpoints(self): original_ranges[dim] = { "min": stats["min"], "max": stats["max"], - "value_count": len(stats["values"]) + "value_count": len(stats["values"]), } # Save checkpoint @@ -71,14 +69,18 @@ def test_feature_ranges_preserved_across_checkpoints(self): for dim, original_range in original_ranges.items(): self.assertIn(dim, db2.feature_stats) loaded_stats = db2.feature_stats[dim] - + self.assertAlmostEqual( - loaded_stats["min"], original_range["min"], places=5, - msg=f"Min range changed for {dim}" + loaded_stats["min"], + original_range["min"], + places=5, + msg=f"Min range changed for {dim}", ) self.assertAlmostEqual( - loaded_stats["max"], original_range["max"], places=5, - msg=f"Max range changed for {dim}" + loaded_stats["max"], + original_range["max"], + places=5, + msg=f"Max range changed for {dim}", ) # Phase 3: Add new program within existing range - ranges should not contract @@ -87,39 +89,35 @@ def test_feature_ranges_preserved_across_checkpoints(self): code="# New program within established range", metrics={ "combined_score": 0.35, # Between existing values - "prompt_length": 200, # Between existing values - "reasoning_sophistication": 0.3 # Between existing values - } + "prompt_length": 200, # Between existing values + "reasoning_sophistication": 0.3, # Between existing values + }, ) - + # Add new program db2.add(new_program) new_coords = db2._calculate_feature_coords(new_program) - + # Verify ranges did not contract (should be same or expanded) for dim, original_range in original_ranges.items(): current_stats = db2.feature_stats[dim] - + self.assertLessEqual( - current_stats["min"], original_range["min"], - f"Min range contracted for {dim}" + current_stats["min"], original_range["min"], f"Min range contracted for {dim}" ) self.assertGreaterEqual( - current_stats["max"], original_range["max"], - f"Max range contracted for {dim}" + current_stats["max"], original_range["max"], f"Max range contracted for {dim}" ) def test_grid_expansion_behavior(self): """Test that grid expands correctly when new programs exceed existing ranges""" config = DatabaseConfig( - db_path=self.test_dir, - feature_dimensions=["score", "execution_time"], - feature_bins=5 + db_path=self.test_dir, feature_dimensions=["score", "execution_time"], feature_bins=5 ) # Phase 1: Establish initial range db1 = ProgramDatabase(config) - + # Initial programs with limited range for i in range(3): program = Program( @@ -127,8 +125,8 @@ def test_grid_expansion_behavior(self): code=f"# Initial program {i}", metrics={ "combined_score": 0.4 + i * 0.1, # 0.4 to 0.6 - "execution_time": 10 + i * 5 # 10 to 20 - } + "execution_time": 10 + i * 5, # 10 to 20 + }, ) db1.add(program) @@ -156,76 +154,67 @@ def test_grid_expansion_behavior(self): id="expansion_test", code="# Program to test range expansion", metrics={ - "combined_score": 0.9, # Higher than existing max (0.6) - "execution_time": 50 # Higher than existing max (20) - } + "combined_score": 0.9, # Higher than existing max (0.6) + "execution_time": 50, # Higher than existing max (20) + }, ) - + db2.add(expansion_program) # Verify ranges expanded appropriately self.assertLessEqual(db2.feature_stats["score"]["min"], original_score_min) self.assertGreaterEqual(db2.feature_stats["score"]["max"], 0.9) - self.assertLessEqual(db2.feature_stats["execution_time"]["min"], original_time_min) + self.assertLessEqual(db2.feature_stats["execution_time"]["min"], original_time_min) self.assertGreaterEqual(db2.feature_stats["execution_time"]["max"], 50) def test_feature_stats_consistency_across_cycles(self): """Test that feature_stats remain consistent across multiple save/load cycles""" config = DatabaseConfig( - db_path=self.test_dir, - feature_dimensions=["score", "memory_usage"], - feature_bins=4 + db_path=self.test_dir, feature_dimensions=["score", "memory_usage"], feature_bins=4 ) # Initial program to establish baseline reference_program = Program( id="reference", code="# Reference program for consistency testing", - metrics={ - "combined_score": 0.5, - "memory_usage": 1024 - } + metrics={"combined_score": 0.5, "memory_usage": 1024}, ) # Cycle 1: Establish initial feature stats db1 = ProgramDatabase(config) db1.add(reference_program) - + # Record initial feature stats cycle1_stats = {} for dim, stats in db1.feature_stats.items(): - cycle1_stats[dim] = { - "min": stats["min"], - "max": stats["max"] - } - + cycle1_stats[dim] = {"min": stats["min"], "max": stats["max"]} + db1.save(self.test_dir, iteration=10) # Cycle 2: Load and verify stats preservation db2 = ProgramDatabase(config) db2.load(self.test_dir) - + # Verify feature stats were preserved for dim, original_stats in cycle1_stats.items(): self.assertIn(dim, db2.feature_stats) self.assertAlmostEqual(db2.feature_stats[dim]["min"], original_stats["min"]) self.assertAlmostEqual(db2.feature_stats[dim]["max"], original_stats["max"]) - + # Add another program and save again - db2.add(Program( - id="cycle2_program", - code="# Cycle 2 program", - metrics={"combined_score": 0.3, "memory_usage": 512} - )) - + db2.add( + Program( + id="cycle2_program", + code="# Cycle 2 program", + metrics={"combined_score": 0.3, "memory_usage": 512}, + ) + ) + # Record expanded stats after adding new program cycle2_stats = {} for dim, stats in db2.feature_stats.items(): - cycle2_stats[dim] = { - "min": stats["min"], - "max": stats["max"] - } - + cycle2_stats[dim] = {"min": stats["min"], "max": stats["max"]} + db2.save(self.test_dir, iteration=20) # Cycle 3: Verify stats are still preserved @@ -236,33 +225,30 @@ def test_feature_stats_consistency_across_cycles(self): for dim, cycle2_stats_dim in cycle2_stats.items(): self.assertIn(dim, db3.feature_stats) self.assertAlmostEqual( - db3.feature_stats[dim]["min"], cycle2_stats_dim["min"], - msg=f"Min value changed for {dim} in cycle 3" + db3.feature_stats[dim]["min"], + cycle2_stats_dim["min"], + msg=f"Min value changed for {dim} in cycle 3", ) self.assertAlmostEqual( - db3.feature_stats[dim]["max"], cycle2_stats_dim["max"], - msg=f"Max value changed for {dim} in cycle 3" + db3.feature_stats[dim]["max"], + cycle2_stats_dim["max"], + msg=f"Max value changed for {dim} in cycle 3", ) def test_feature_stats_accumulation(self): """Test that feature_stats accumulate correctly across checkpoint cycles""" config = DatabaseConfig( - db_path=self.test_dir, - feature_dimensions=["score", "complexity"], - feature_bins=10 + db_path=self.test_dir, feature_dimensions=["score", "complexity"], feature_bins=10 ) # Cycle 1: Initial programs db1 = ProgramDatabase(config) - + for i in range(3): program = Program( id=f"phase1_{i}", code=f"# Phase 1 program {i}", - metrics={ - "combined_score": 0.2 + i * 0.2, - "complexity": 100 + i * 50 - } + metrics={"combined_score": 0.2 + i * 0.2, "complexity": 100 + i * 50}, ) db1.add(program) @@ -280,10 +266,7 @@ def test_feature_stats_accumulation(self): program = Program( id=f"phase2_{i}", code=f"# Phase 2 program {i}", - metrics={ - "combined_score": 0.1 + i * 0.3, - "complexity": 75 + i * 75 - } + metrics={"combined_score": 0.1 + i * 0.3, "complexity": 75 + i * 75}, ) db2.add(program) @@ -294,13 +277,13 @@ def test_feature_stats_accumulation(self): # Phase 1 values should be preserved (subset relationship) self.assertTrue( phase1_score_values.issubset(phase2_score_values), - "Phase 1 score values were lost after loading checkpoint" + "Phase 1 score values were lost after loading checkpoint", ) self.assertTrue( phase1_complexity_values.issubset(phase2_complexity_values), - "Phase 1 complexity values were lost after loading checkpoint" + "Phase 1 complexity values were lost after loading checkpoint", ) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_island_isolation.py b/tests/test_island_isolation.py index d70459a4e..2ed5b632f 100644 --- a/tests/test_island_isolation.py +++ b/tests/test_island_isolation.py @@ -20,16 +20,14 @@ def setUp(self): self.config.database.num_islands = 3 self.config.evaluator.parallel_evaluations = 6 # 2 workers per island self.config.database.population_size = 30 - + self.database = ProgramDatabase(self.config.database) self.evaluation_file = "mock_evaluator.py" def test_worker_island_mapping(self): """Test that workers are correctly mapped to islands""" - controller = ProcessParallelController( - self.config, self.evaluation_file, self.database - ) - + controller = ProcessParallelController(self.config, self.evaluation_file, self.database) + # Check mapping is correct expected_mapping = { 0: 0, # Worker 0 -> Island 0 @@ -39,182 +37,164 @@ def test_worker_island_mapping(self): 4: 1, # Worker 4 -> Island 1 5: 2, # Worker 5 -> Island 2 } - + self.assertEqual(controller.worker_island_map, expected_mapping) def test_uneven_worker_distribution(self): """Test mapping when workers don't divide evenly into islands""" self.config.evaluator.parallel_evaluations = 7 # Not divisible by 3 - - controller = ProcessParallelController( - self.config, self.evaluation_file, self.database - ) - + + controller = ProcessParallelController(self.config, self.evaluation_file, self.database) + # Island 0 should get 3 workers, islands 1 and 2 get 2 each island_worker_counts = {0: 0, 1: 0, 2: 0} for worker_id, island_id in controller.worker_island_map.items(): island_worker_counts[island_id] += 1 - + self.assertEqual(island_worker_counts[0], 3) self.assertEqual(island_worker_counts[1], 2) self.assertEqual(island_worker_counts[2], 2) def test_submit_iteration_uses_correct_island(self): """Test that _submit_iteration samples from the specified island""" - controller = ProcessParallelController( - self.config, self.evaluation_file, self.database - ) - + controller = ProcessParallelController(self.config, self.evaluation_file, self.database) + # Add some test programs to different islands for i in range(9): program = Program( - id=f"test_prog_{i}", - code=f"# Test program {i}", - metrics={"combined_score": 0.5} + id=f"test_prog_{i}", code=f"# Test program {i}", metrics={"combined_score": 0.5} ) island_id = i % 3 program.metadata["island"] = island_id self.database.add(program) self.database.islands[island_id].add(program.id) - - with patch.object(controller, 'executor') as mock_executor: + + with patch.object(controller, "executor") as mock_executor: mock_future = MagicMock() mock_executor.submit.return_value = mock_future - + # Submit iteration for island 1 original_island = self.database.current_island future = controller._submit_iteration(100, island_id=1) - + # Check that database island was temporarily changed # but restored after sampling self.assertEqual(self.database.current_island, original_island) - + # Check that submit was called self.assertIsNotNone(future) mock_executor.submit.assert_called_once() - + # Get the snapshot that was passed to worker call_args = mock_executor.submit.call_args[0] db_snapshot = call_args[2] # Third argument is db_snapshot - + # Verify snapshot has island marking self.assertEqual(db_snapshot["sampling_island"], 1) def test_island_isolation_during_evolution(self): """Test that parallel workers maintain island isolation""" - controller = ProcessParallelController( - self.config, self.evaluation_file, self.database - ) - + controller = ProcessParallelController(self.config, self.evaluation_file, self.database) + # Track which islands were sampled sampled_islands = [] - + def mock_sample(num_inspirations=None): # Record which island was sampled sampled_islands.append(self.database.current_island) # Return mock parent and inspirations mock_program = Program(id="mock", code="", metrics={}) return mock_program, [] - - with patch.object(self.database, 'sample', side_effect=mock_sample): - with patch.object(controller, 'executor'): + + with patch.object(self.database, "sample", side_effect=mock_sample): + with patch.object(controller, "executor"): # Submit iterations for different islands controller._submit_iteration(1, island_id=0) controller._submit_iteration(2, island_id=1) controller._submit_iteration(3, island_id=2) controller._submit_iteration(4, island_id=0) - + # Check that correct islands were sampled self.assertEqual(sampled_islands, [0, 1, 2, 0]) def test_fewer_workers_than_islands(self): """Test handling when there are fewer workers than islands""" self.config.evaluator.parallel_evaluations = 2 # Only 2 workers for 3 islands - - controller = ProcessParallelController( - self.config, self.evaluation_file, self.database - ) - + + controller = ProcessParallelController(self.config, self.evaluation_file, self.database) + # Workers should be distributed across available islands expected_mapping = { 0: 0, # Worker 0 -> Island 0 1: 1, # Worker 1 -> Island 1 # Island 2 has no dedicated worker } - + self.assertEqual(controller.worker_island_map, expected_mapping) def test_database_current_island_restoration(self): """Test that database current_island is properly restored after sampling""" - controller = ProcessParallelController( - self.config, self.evaluation_file, self.database - ) - + controller = ProcessParallelController(self.config, self.evaluation_file, self.database) + # Add test programs for i in range(6): program = Program( - id=f"test_prog_{i}", - code=f"# Test program {i}", - metrics={"combined_score": 0.5} + id=f"test_prog_{i}", code=f"# Test program {i}", metrics={"combined_score": 0.5} ) island_id = i % 3 program.metadata["island"] = island_id self.database.add(program) self.database.islands[island_id].add(program.id) - + # Set initial island self.database.current_island = 1 original_island = self.database.current_island - - with patch.object(controller, 'executor') as mock_executor: + + with patch.object(controller, "executor") as mock_executor: mock_executor.submit.return_value = MagicMock() - + # Submit iteration for different island controller._submit_iteration(100, island_id=2) - + # Check that current_island was restored self.assertEqual(self.database.current_island, original_island) def test_island_distribution_in_batch(self): """Test that initial batch is distributed across islands""" - controller = ProcessParallelController( - self.config, self.evaluation_file, self.database - ) - + controller = ProcessParallelController(self.config, self.evaluation_file, self.database) + # Add test programs for i in range(9): program = Program( - id=f"test_prog_{i}", - code=f"# Test program {i}", - metrics={"combined_score": 0.5} + id=f"test_prog_{i}", code=f"# Test program {i}", metrics={"combined_score": 0.5} ) island_id = i % 3 program.metadata["island"] = island_id self.database.add(program) self.database.islands[island_id].add(program.id) - + # Track submitted islands submitted_islands = [] - + def mock_submit_iteration(iteration, island_id=None): if island_id is not None: submitted_islands.append(island_id) return MagicMock() - + # Start the process pool controller.start() - + try: - with patch.object(controller, '_submit_iteration', side_effect=mock_submit_iteration): + with patch.object(controller, "_submit_iteration", side_effect=mock_submit_iteration): # Start evolution with small batch to test distribution asyncio.run(controller.run_evolution(1, 6)) # 6 iterations - + # Check that islands were distributed (expect round-robin pattern) # Should be [0, 1, 2, 0, 1, 2] or similar distribution island_counts = {0: 0, 1: 0, 2: 0} for island_id in submitted_islands: island_counts[island_id] += 1 - + # Each island should have received iterations for count in island_counts.values(): self.assertGreater(count, 0) @@ -224,7 +204,7 @@ def mock_submit_iteration(iteration, island_id=None): class TestIslandMigration(unittest.TestCase): """Test that migration still works with island pinning""" - + def setUp(self): """Set up test environment""" self.config = Config() @@ -232,49 +212,47 @@ def setUp(self): self.config.database.migration_interval = 10 self.config.database.migration_rate = 0.1 self.database = ProgramDatabase(self.config.database) - + def test_migration_preserves_island_structure(self): """Test that migration works correctly with pinned workers""" # Add programs to islands properly for i in range(30): program = Program( - id=f"prog_{i}", - code=f"# Program {i}", - metrics={"combined_score": i * 0.1} + id=f"prog_{i}", code=f"# Program {i}", metrics={"combined_score": i * 0.1} ) island_id = i % 3 program.metadata["island"] = island_id - + # Add to database self.database.programs[program.id] = program # Add to island self.database.islands[island_id].add(program.id) - + # Record island populations before migration island_sizes_before = [len(island) for island in self.database.islands] original_program_count = len(self.database.programs) - + # Verify we set up the test correctly self.assertEqual(sum(island_sizes_before), 30) self.assertEqual(original_program_count, 30) - + # Trigger migration self.database.migrate_programs() - + # Check islands still have programs island_sizes_after = [len(island) for island in self.database.islands] total_programs_after = len(self.database.programs) - + # All islands should still have programs for size in island_sizes_after: self.assertGreater(size, 0) - + # Migration creates copies, so total population should increase # With migration_rate=0.1 and 10 programs per island, expect ~1 program per island to migrate # Each program migrates to 2 adjacent islands, so we expect ~6 new programs self.assertGreater(total_programs_after, original_program_count) self.assertGreater(sum(island_sizes_after), sum(island_sizes_before)) - + # Verify that migrant programs have correct metadata migrant_count = 0 for program in self.database.programs.values(): @@ -282,40 +260,40 @@ def test_migration_preserves_island_structure(self): migrant_count += 1 # Migrant should have "_migrant_" in their ID self.assertIn("_migrant_", program.id) - + # Should have some migrant programs self.assertGreater(migrant_count, 0) class TestWorkerPinningEdgeCases(unittest.TestCase): """Test edge cases for worker-to-island pinning""" - + def test_single_island(self): """Test behavior with only one island""" config = Config() config.database.num_islands = 1 config.evaluator.parallel_evaluations = 4 - + database = ProgramDatabase(config.database) controller = ProcessParallelController(config, "test.py", database) - + # All workers should map to island 0 expected_mapping = {0: 0, 1: 0, 2: 0, 3: 0} self.assertEqual(controller.worker_island_map, expected_mapping) - + def test_single_worker(self): """Test behavior with only one worker""" config = Config() config.database.num_islands = 5 config.evaluator.parallel_evaluations = 1 - + database = ProgramDatabase(config.database) controller = ProcessParallelController(config, "test.py", database) - + # Single worker should map to island 0 expected_mapping = {0: 0} self.assertEqual(controller.worker_island_map, expected_mapping) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_island_parent_consistency.py b/tests/test_island_parent_consistency.py index ad6bd385e..d15eeb704 100644 --- a/tests/test_island_parent_consistency.py +++ b/tests/test_island_parent_consistency.py @@ -15,63 +15,60 @@ def test_parent_child_island_consistency(self): config = Config() config.database.num_islands = 3 database = ProgramDatabase(config.database) - + # Create initial program on island 0 initial_program = Program( - id="initial", - code="def initial(): pass", - metrics={"score": 0.5}, - iteration_found=0 + id="initial", code="def initial(): pass", metrics={"score": 0.5}, iteration_found=0 ) database.add(initial_program) # Should go to island 0 (current_island) - + # Verify initial program is on island 0 self.assertIn("initial", database.islands[0]) self.assertEqual(initial_program.metadata.get("island"), 0) - + # Now switch to island 1 database.next_island() self.assertEqual(database.current_island, 1) - + # Create a child of the initial program child_program = Program( id="child1", code="def child1(): pass", parent_id="initial", # Parent is on island 0 metrics={"score": 0.6}, - iteration_found=1 + iteration_found=1, ) - + # Add child without specifying target_island # This is what happens in process_parallel.py line 445 database.add(child_program) - + # With the fix: child should go to parent's island (0), not current_island (1) parent_island = database.programs["initial"].metadata.get("island", 0) child_island = database.programs["child1"].metadata.get("island") - + # Check if parent is in child's island (this is what the user's assertion checks) if child_program.parent_id: # This is the exact check from the issue report - should now pass self.assertIn( - child_program.parent_id, + child_program.parent_id, database.islands[child_island], - "Parent should be in child's island" + "Parent should be in child's island", ) - + # Verify child is on same island as parent self.assertEqual( - parent_island, + parent_island, child_island, - f"Child should be on same island as parent. Parent: island {parent_island}, Child: island {child_island}" + f"Child should be on same island as parent. Parent: island {parent_island}, Child: island {child_island}", ) - + def test_multiple_generations_island_drift(self): """Test that island drift happens across multiple generations""" config = Config() config.database.num_islands = 4 database = ProgramDatabase(config.database) - + # Create a lineage programs = [] for i in range(10): @@ -81,7 +78,7 @@ def test_multiple_generations_island_drift(self): id=f"prog_{i}", code=f"def prog_{i}(): pass", metrics={"score": 0.1 * i}, - iteration_found=i + iteration_found=i, ) else: # Child of previous @@ -90,16 +87,16 @@ def test_multiple_generations_island_drift(self): code=f"def prog_{i}(): pass", parent_id=f"prog_{i-1}", metrics={"score": 0.1 * i}, - iteration_found=i + iteration_found=i, ) - + database.add(prog) programs.append(prog) - + # Switch islands periodically (simulating what happens in evolution) if i % 3 == 0: database.next_island() - + # Check island consistency inconsistent_pairs = [] for prog in programs: @@ -108,18 +105,18 @@ def test_multiple_generations_island_drift(self): if parent: parent_island = parent.metadata.get("island") child_island = prog.metadata.get("island") - + # Check if parent is in child's island if prog.parent_id not in database.islands[child_island]: inconsistent_pairs.append((prog.parent_id, prog.id)) - + # With the fix, we should find NO inconsistent parent-child island assignments self.assertEqual( - len(inconsistent_pairs), + len(inconsistent_pairs), 0, - f"Found {len(inconsistent_pairs)} inconsistent parent-child pairs: {inconsistent_pairs}" + f"Found {len(inconsistent_pairs)} inconsistent parent-child pairs: {inconsistent_pairs}", ) - + # Verify all parent-child pairs are on the same island for prog in programs: if prog.parent_id: @@ -131,44 +128,40 @@ def test_multiple_generations_island_drift(self): parent_island, child_island, f"Parent {prog.parent_id} (island {parent_island}) and " - f"child {prog.id} (island {child_island}) should be on same island" + f"child {prog.id} (island {child_island}) should be on same island", ) - def test_explicit_migration_override(self): """Test that explicit target_island overrides parent island inheritance""" config = Config() config.database.num_islands = 3 database = ProgramDatabase(config.database) - + # Create parent on island 0 parent = Program( - id="parent", - code="def parent(): pass", - metrics={"score": 0.5}, - iteration_found=0 + id="parent", code="def parent(): pass", metrics={"score": 0.5}, iteration_found=0 ) database.add(parent) # Goes to island 0 self.assertIn("parent", database.islands[0]) - + # Create child but explicitly send to island 2 (migration) migrant_child = Program( id="migrant", code="def migrant(): pass", parent_id="parent", metrics={"score": 0.7}, - iteration_found=1 + iteration_found=1, ) database.add(migrant_child, target_island=2) # Explicit migration - + # Verify migrant went to island 2, not parent's island 0 self.assertIn("migrant", database.islands[2]) self.assertNotIn("migrant", database.islands[0]) self.assertEqual(migrant_child.metadata.get("island"), 2) - + # Parent should still be on island 0 self.assertEqual(database.programs["parent"].metadata.get("island"), 0) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_model_parameter_demo.py b/tests/test_model_parameter_demo.py index 142fe43e1..6e19c6229 100644 --- a/tests/test_model_parameter_demo.py +++ b/tests/test_model_parameter_demo.py @@ -2,69 +2,73 @@ Demonstration of fixed OpenAI model parameter handling """ + def demo_model_parameter_selection(): """Demonstrate how different models get different parameters""" - + # Mock the logic from openai.py OPENAI_REASONING_MODEL_PREFIXES = ( # O-series reasoning models - "o1-", "o1", # o1, o1-mini, o1-preview - "o3-", "o3", # o3, o3-mini, o3-pro - "o4-", # o4-mini + "o1-", + "o1", # o1, o1-mini, o1-preview + "o3-", + "o3", # o3, o3-mini, o3-pro + "o4-", # o4-mini # GPT-5 series are also reasoning models - "gpt-5-", "gpt-5" # gpt-5, gpt-5-mini, gpt-5-nano + "gpt-5-", + "gpt-5", # gpt-5, gpt-5-mini, gpt-5-nano ) - + def get_params_for_model(model_name, api_base="https://api.openai.com/v1"): """Show what parameters would be used for each model""" model_lower = str(model_name).lower() is_openai_reasoning_model = ( - api_base == "https://api.openai.com/v1" and - model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES) + api_base == "https://api.openai.com/v1" + and model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES) ) - + if is_openai_reasoning_model: return { "type": "reasoning_model", "uses": "max_completion_tokens", "supports": ["reasoning_effort", "verbosity"], - "excludes": ["temperature", "top_p"] + "excludes": ["temperature", "top_p"], } else: return { - "type": "standard_model", + "type": "standard_model", "uses": "max_tokens", "supports": ["temperature", "top_p"], - "excludes": [] + "excludes": [], } - + print("šŸ”§ OpenAI Model Parameter Selection Demo") print("=" * 50) - + test_models = [ # Reasoning models ("o1-mini", "āœ… Reasoning"), ("o1-preview", "āœ… Reasoning"), ("o3-mini-2025-01-31", "āœ… Reasoning (with date)"), ("gpt-5-nano", "āœ… Reasoning (GPT-5 series)"), - - # Standard models + # Standard models ("gpt-4o-mini", "āŒ Standard (not reasoning)"), ("gpt-4o", "āŒ Standard"), ("gpt-4-turbo", "āŒ Standard"), ] - + for model, description in test_models: params = get_params_for_model(model) print(f"\nšŸ“‹ Model: {model}") print(f" Type: {description}") print(f" Uses: {params['uses']}") print(f" Supports: {', '.join(params['supports'])}") - if params['excludes']: + if params["excludes"]: print(f" Excludes: {', '.join(params['excludes'])}") - + print("\n" + "=" * 50) print("āœ… Fix successful! No more false positives/negatives.") + if __name__ == "__main__": - demo_model_parameter_selection() \ No newline at end of file + demo_model_parameter_selection() diff --git a/tests/test_openai_model_detection.py b/tests/test_openai_model_detection.py index fb9b745f0..c8665abd0 100644 --- a/tests/test_openai_model_detection.py +++ b/tests/test_openai_model_detection.py @@ -11,23 +11,25 @@ class TestOpenAIReasoningModelDetection(unittest.TestCase): def test_reasoning_model_detection(self): """Test various model names to ensure correct reasoning model detection""" - + # Define the same constants as in the code OPENAI_REASONING_MODEL_PREFIXES = ( # O-series reasoning models - "o1-", "o1", # o1, o1-mini, o1-preview - "o3-", "o3", # o3, o3-mini, o3-pro - "o4-", # o4-mini + "o1-", + "o1", # o1, o1-mini, o1-preview + "o3-", + "o3", # o3, o3-mini, o3-pro + "o4-", # o4-mini # GPT-5 series are also reasoning models - "gpt-5-", "gpt-5" # gpt-5, gpt-5-mini, gpt-5-nano + "gpt-5-", + "gpt-5", # gpt-5, gpt-5-mini, gpt-5-nano ) - + def is_reasoning_model(model_name, api_base="https://api.openai.com/v1"): """Test function that mimics the logic in openai.py""" model_lower = str(model_name).lower() - return ( - api_base == "https://api.openai.com/v1" and - model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES) + return api_base == "https://api.openai.com/v1" and model_lower.startswith( + OPENAI_REASONING_MODEL_PREFIXES ) # Test cases: (model_name, expected_result, description) @@ -44,7 +46,6 @@ def is_reasoning_model(model_name, api_base="https://api.openai.com/v1"): ("gpt-5", True, "Base gpt-5 model"), ("gpt-5-mini", True, "gpt-5-mini model"), ("gpt-5-nano", True, "gpt-5-nano model"), - # Non-reasoning models - should return False ("gpt-4o-mini", False, "gpt-4o-mini (not reasoning)"), ("gpt-4o", False, "gpt-4o (not reasoning)"), @@ -52,50 +53,46 @@ def is_reasoning_model(model_name, api_base="https://api.openai.com/v1"): ("gpt-3.5-turbo", False, "gpt-3.5-turbo (not reasoning)"), ("claude-3", False, "Non-OpenAI model"), ("gemini-pro", False, "Non-OpenAI model"), - # Edge cases ("O1-MINI", True, "Uppercase o1-mini"), ("GPT-5-MINI", True, "Uppercase gpt-5-mini"), ] - + for model_name, expected, description in test_cases: with self.subTest(model=model_name, desc=description): result = is_reasoning_model(model_name) self.assertEqual( - result, - expected, - f"Model '{model_name}' ({description}): expected {expected}, got {result}" + result, + expected, + f"Model '{model_name}' ({description}): expected {expected}, got {result}", ) def test_non_openai_api_base(self): """Test that non-OpenAI API bases don't trigger reasoning model logic""" - OPENAI_REASONING_MODEL_PREFIXES = ( - "o1-", "o1", "o3-", "o3", "o4-", "gpt-5-", "gpt-5" - ) - + OPENAI_REASONING_MODEL_PREFIXES = ("o1-", "o1", "o3-", "o3", "o4-", "gpt-5-", "gpt-5") + def is_reasoning_model(model_name, api_base): model_lower = str(model_name).lower() - return ( - api_base == "https://api.openai.com/v1" and - model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES) + return api_base == "https://api.openai.com/v1" and model_lower.startswith( + OPENAI_REASONING_MODEL_PREFIXES ) - + # Even reasoning model names should return False for non-OpenAI APIs test_cases = [ ("o1-mini", "https://api.anthropic.com/v1", False), ("gpt-5", "https://generativelanguage.googleapis.com/v1beta/openai/", False), ("o3-mini", "https://api.deepseek.com/v1", False), ] - + for model_name, api_base, expected in test_cases: with self.subTest(model=model_name, api=api_base): result = is_reasoning_model(model_name, api_base) self.assertEqual( - result, - expected, - f"Model '{model_name}' with API '{api_base}' should return {expected}" + result, + expected, + f"Model '{model_name}' with API '{api_base}' should return {expected}", ) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_process_parallel.py b/tests/test_process_parallel.py index 925d23a3c..8cdd525b3 100644 --- a/tests/test_process_parallel.py +++ b/tests/test_process_parallel.py @@ -8,6 +8,7 @@ import unittest from unittest.mock import Mock, patch, MagicMock import time +from concurrent.futures import Future # Set dummy API key for testing os.environ["OPENAI_API_KEY"] = "test" @@ -111,7 +112,7 @@ async def run_test(): # Mock the executor to avoid actually spawning processes with patch.object(controller, "_submit_iteration") as mock_submit: # Create mock futures that complete immediately - mock_future1 = asyncio.Future() + mock_future1 = MagicMock() mock_result1 = SerializableResult( child_program_dict={ "id": "child_1", @@ -127,7 +128,9 @@ async def run_test(): iteration_time=0.1, iteration=1, ) - mock_future1.set_result(mock_result1) + mock_future1.done.return_value = True + mock_future1.result.return_value = mock_result1 + mock_future1.cancel.return_value = True mock_submit.return_value = mock_future1 diff --git a/tests/test_prompt_sampler_comprehensive.py b/tests/test_prompt_sampler_comprehensive.py index a0ae9292a..b001c5e67 100644 --- a/tests/test_prompt_sampler_comprehensive.py +++ b/tests/test_prompt_sampler_comprehensive.py @@ -28,9 +28,9 @@ def test_build_prompt_with_inspirations(self): "accuracy": 0.9, "speed": 0.8, "complexity": 5, - "memory_usage": 100 + "memory_usage": 100, } - + # Create inspirations with diverse characteristics inspirations = [ { @@ -41,9 +41,9 @@ def test_build_prompt_with_inspirations(self): "accuracy": 0.7, "speed": 0.95, "complexity": 3, - "memory_usage": 50 + "memory_usage": 50, }, - "metadata": {"diverse": True} + "metadata": {"diverse": True}, }, { "id": "insp2", @@ -53,10 +53,10 @@ def test_build_prompt_with_inspirations(self): "accuracy": 0.8, "speed": 0.5, "complexity": 7, - "memory_usage": 20 + "memory_usage": 20, }, - "metadata": {"migrant": True} - } + "metadata": {"migrant": True}, + }, ] # Build prompt with inspirations and feature_dimensions @@ -65,17 +65,17 @@ def test_build_prompt_with_inspirations(self): parent_program=parent_program, program_metrics=program_metrics, inspirations=inspirations, - feature_dimensions=self.feature_dimensions + feature_dimensions=self.feature_dimensions, ) # Verify prompt was built successfully self.assertIn("system", prompt) self.assertIn("user", prompt) - + # Check that inspirations are included self.assertIn("fast_implementation", prompt["user"]) self.assertIn("memory_efficient", prompt["user"]) - + # Verify fitness scores are calculated correctly (excluding feature dimensions) # The inspirations should show their fitness scores, not including complexity/memory_usage self.assertIn("0.75", prompt["user"]) # insp1's combined_score @@ -91,19 +91,17 @@ def test_format_inspirations_section_with_feature_dimensions(self): "combined_score": 0.9, "accuracy": 0.95, "complexity": 10, # Feature dimension - "memory_usage": 200 # Feature dimension + "memory_usage": 200, # Feature dimension }, - "metadata": {"diverse": True} + "metadata": {"diverse": True}, } ] - + # Call the method directly result = self.prompt_sampler._format_inspirations_section( - inspirations, - "python", - feature_dimensions=["complexity", "memory_usage"] + inspirations, "python", feature_dimensions=["complexity", "memory_usage"] ) - + # Should not raise NameError self.assertIsInstance(result, str) self.assertIn("test_func", result) @@ -116,36 +114,28 @@ def test_format_inspirations_section_without_feature_dimensions(self): "id": "test2", "code": "def another_func(): pass", "metrics": {"score": 0.7, "time": 1.2}, - "metadata": {} + "metadata": {}, } ] - + # Call without feature_dimensions (should use default of None) - result = self.prompt_sampler._format_inspirations_section( - inspirations, - "python" - ) - + result = self.prompt_sampler._format_inspirations_section(inspirations, "python") + self.assertIsInstance(result, str) self.assertIn("another_func", result) def test_determine_program_type_with_feature_dimensions(self): """Test _determine_program_type with feature_dimensions parameter""" program = { - "metrics": { - "combined_score": 0.85, - "complexity": 5, - "memory_usage": 100 - }, - "metadata": {} + "metrics": {"combined_score": 0.85, "complexity": 5, "memory_usage": 100}, + "metadata": {}, } - + # Test with feature_dimensions program_type = self.prompt_sampler._determine_program_type( - program, - feature_dimensions=["complexity", "memory_usage"] + program, feature_dimensions=["complexity", "memory_usage"] ) - + self.assertEqual(program_type, "High-Performer") # Based on combined_score of 0.85 def test_extract_unique_features_calls_determine_program_type(self): @@ -153,30 +143,26 @@ def test_extract_unique_features_calls_determine_program_type(self): program = { "code": "", # Empty code to trigger default features "metrics": {"score": 0.5}, - "metadata": {} + "metadata": {}, } - + # This should not raise NameError when calling _determine_program_type features = self.prompt_sampler._extract_unique_features(program) - + self.assertIsInstance(features, str) self.assertIn("approach to the problem", features) def test_build_prompt_with_all_optional_parameters(self): """Test build_prompt with all optional parameters including inspirations""" current_program = "def main(): pass" - + # Comprehensive test data - previous_programs = [ - {"id": "prev1", "code": "def v1(): pass", "metrics": {"score": 0.3}} - ] + previous_programs = [{"id": "prev1", "code": "def v1(): pass", "metrics": {"score": 0.3}}] top_programs = [ {"id": "top1", "code": "def best(): pass", "metrics": {"combined_score": 0.95}} ] - inspirations = [ - {"id": "insp1", "code": "def creative(): pass", "metrics": {"score": 0.6}} - ] - + inspirations = [{"id": "insp1", "code": "def creative(): pass", "metrics": {"score": 0.6}}] + prompt = self.prompt_sampler.build_prompt( current_program=current_program, parent_program="def parent(): pass", @@ -188,9 +174,9 @@ def test_build_prompt_with_all_optional_parameters(self): evolution_round=5, diff_based_evolution=True, feature_dimensions=["feature1"], - program_artifacts={"output": "test output"} + program_artifacts={"output": "test output"}, ) - + self.assertIn("system", prompt) self.assertIn("user", prompt) # Verify all components are included @@ -205,20 +191,18 @@ def test_fitness_calculation_consistency(self): "accuracy": 0.9, "speed": 0.7, "complexity": 5, # Feature dimension - "memory_usage": 100 # Feature dimension + "memory_usage": 100, # Feature dimension } feature_dimensions = ["complexity", "memory_usage"] - + # Build a prompt with these metrics prompt = self.prompt_sampler.build_prompt( current_program="def test(): pass", program_metrics=metrics, - inspirations=[ - {"id": "i1", "code": "pass", "metrics": metrics} - ], - feature_dimensions=feature_dimensions + inspirations=[{"id": "i1", "code": "pass", "metrics": metrics}], + feature_dimensions=feature_dimensions, ) - + # The fitness score should be 0.8 (combined_score), not an average including features self.assertIn("0.8000", prompt["user"]) # Fitness score in prompt @@ -227,9 +211,9 @@ def test_empty_inspirations_list(self): prompt = self.prompt_sampler.build_prompt( current_program="def empty(): pass", inspirations=[], # Empty list - feature_dimensions=["test_feature"] + feature_dimensions=["test_feature"], ) - + self.assertIn("system", prompt) self.assertIn("user", prompt) # Should complete without errors @@ -246,46 +230,39 @@ def test_inspirations_with_missing_metrics(self): "id": "bad2", "code": "def worse(): pass", # No metrics key at all - } + }, ] - + # Should handle gracefully without errors result = self.prompt_sampler._format_inspirations_section( - inspirations, - "python", - feature_dimensions=["test"] + inspirations, "python", feature_dimensions=["test"] ) - + self.assertIsInstance(result, str) def test_feature_dimensions_none_vs_empty_list(self): """Test that None and empty list for feature_dimensions are handled correctly""" program = {"metrics": {"score": 0.5}} - + # Test with None type_none = self.prompt_sampler._determine_program_type(program, None) - + # Test with empty list type_empty = self.prompt_sampler._determine_program_type(program, []) - + # Both should work and give same result self.assertEqual(type_none, type_empty) def test_feature_coordinates_formatting_in_prompt(self): """Test that feature coordinates are formatted correctly in the prompt""" - metrics = { - "combined_score": 0.75, - "complexity": 8, - "memory_usage": 150, - "cpu_usage": 0.3 - } - + metrics = {"combined_score": 0.75, "complexity": 8, "memory_usage": 150, "cpu_usage": 0.3} + prompt = self.prompt_sampler.build_prompt( current_program="def test(): pass", program_metrics=metrics, - feature_dimensions=["complexity", "memory_usage", "cpu_usage"] + feature_dimensions=["complexity", "memory_usage", "cpu_usage"], ) - + # Check that feature coordinates are included user_msg = prompt["user"] self.assertIn("complexity", user_msg) @@ -294,4 +271,4 @@ def test_feature_coordinates_formatting_in_prompt(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() From ec44ff24169e596540cdfc7ad8bb45e254a96e10 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sat, 30 Aug 2025 23:04:22 +0800 Subject: [PATCH 2/2] Update _version.py --- openevolve/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openevolve/_version.py b/openevolve/_version.py index 6ce4e8a54..33eb37d35 100644 --- a/openevolve/_version.py +++ b/openevolve/_version.py @@ -1,3 +1,3 @@ """Version information for openevolve package.""" -__version__ = "0.2.10" +__version__ = "0.2.11"