From 8fe021725d4a7661d22c7a115293e1ed1a4053ec Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sun, 5 Oct 2025 07:15:17 +0530 Subject: [PATCH 1/3] fix --- openevolve/database.py | 209 ++++++++++++---- tests/test_sample_from_island_ratios.py | 305 ++++++++++++++++++++++++ 2 files changed, 464 insertions(+), 50 deletions(-) create mode 100644 tests/test_sample_from_island_ratios.py diff --git a/openevolve/database.py b/openevolve/database.py index c62a5488..9313e95e 100644 --- a/openevolve/database.py +++ b/openevolve/database.py @@ -368,87 +368,70 @@ def sample_from_island( ) -> Tuple[Program, List[Program]]: """ Sample a program and inspirations from a specific island without modifying current_island - + This method is thread-safe and doesn't modify shared state, avoiding race conditions when multiple workers sample from different islands concurrently. - + + Uses the same exploration/exploitation/random strategy as sample() to ensure + consistent behavior between single-process and parallel execution modes. + Args: island_id: The island to sample from num_inspirations: Number of inspiration programs to sample (defaults to 5) - + Returns: Tuple of (parent_program, inspiration_programs) """ # Ensure valid island ID island_id = island_id % len(self.islands) - + # Get programs from the specific island island_programs = list(self.islands[island_id]) - + if not island_programs: # Island is empty, fall back to sampling from all programs logger.debug(f"Island {island_id} is empty, sampling from all programs") return self.sample(num_inspirations) - - # Select parent from island programs - if len(island_programs) == 1: - parent_id = island_programs[0] + + # Use exploration_ratio and exploitation_ratio to decide sampling strategy + # This matches the logic in _sample_parent() for consistent behavior + rand_val = random.random() + + if rand_val < self.config.exploration_ratio: + # EXPLORATION: Sample randomly from island (diverse sampling) + parent = self._sample_from_island_random(island_id) + sampling_mode = "exploration" + elif rand_val < self.config.exploration_ratio + self.config.exploitation_ratio: + # EXPLOITATION: Sample from archive (elite programs) + parent = self._sample_from_archive_for_island(island_id) + sampling_mode = "exploitation" else: - # Use weighted sampling based on program scores - island_program_objects = [ - self.programs[pid] for pid in island_programs - if pid in self.programs - ] - - if not island_program_objects: - # Fallback if programs not found - parent_id = random.choice(island_programs) - else: - # Calculate weights based on fitness scores - weights = [] - for prog in island_program_objects: - fitness = get_fitness_score(prog.metrics, self.config.feature_dimensions) - # Add small epsilon to avoid zero weights - weights.append(max(fitness, 0.001)) - - # Normalize weights - total_weight = sum(weights) - if total_weight > 0: - weights = [w / total_weight for w in weights] - else: - weights = [1.0 / len(island_program_objects)] * len(island_program_objects) - - # Sample parent based on weights - parent = random.choices(island_program_objects, weights=weights, k=1)[0] - parent_id = parent.id - - parent = self.programs.get(parent_id) - if not parent: - # Should not happen, but handle gracefully - logger.error(f"Parent program {parent_id} not found in database") - return self.sample(num_inspirations) - + # WEIGHTED: Use fitness-weighted sampling (remaining probability) + parent = self._sample_from_island_weighted(island_id) + sampling_mode = "weighted" + # Select inspirations from the same island if num_inspirations is None: num_inspirations = 5 # Default for backward compatibility - + # Get other programs from the island for inspirations - other_programs = [pid for pid in island_programs if pid != parent_id] - + other_programs = [pid for pid in island_programs if pid != parent.id] + if len(other_programs) < num_inspirations: # Not enough programs in island, use what we have inspiration_ids = other_programs else: # Sample inspirations inspiration_ids = random.sample(other_programs, num_inspirations) - + inspirations = [ - self.programs[pid] for pid in inspiration_ids + self.programs[pid] for pid in inspiration_ids if pid in self.programs ] - + logger.debug( - f"Sampled parent {parent.id} and {len(inspirations)} inspirations from island {island_id}" + f"Sampled parent {parent.id} and {len(inspirations)} inspirations from island {island_id} " + f"(mode: {sampling_mode}, rand_val: {rand_val:.3f})" ) return parent, inspirations @@ -1264,6 +1247,132 @@ def _sample_random_parent(self) -> Program: program_id = random.choice(list(self.programs.keys())) return self.programs[program_id] + def _sample_from_island_weighted(self, island_id: int) -> Program: + """ + Sample a parent from a specific island using fitness-weighted selection + + Args: + island_id: The island to sample from + + Returns: + Parent program selected using fitness-weighted sampling + """ + island_id = island_id % len(self.islands) + island_programs = list(self.islands[island_id]) + + if not island_programs: + # Island is empty, fall back to any available program + logger.debug(f"Island {island_id} is empty, sampling from all programs") + return self._sample_random_parent() + + # Select parent from island programs + if len(island_programs) == 1: + parent_id = island_programs[0] + else: + # Use weighted sampling based on program scores + island_program_objects = [ + self.programs[pid] for pid in island_programs + if pid in self.programs + ] + + if not island_program_objects: + # Fallback if programs not found + parent_id = random.choice(island_programs) + else: + # Calculate weights based on fitness scores + weights = [] + for prog in island_program_objects: + fitness = get_fitness_score(prog.metrics, self.config.feature_dimensions) + # Add small epsilon to avoid zero weights + weights.append(max(fitness, 0.001)) + + # Normalize weights + total_weight = sum(weights) + if total_weight > 0: + weights = [w / total_weight for w in weights] + else: + weights = [1.0 / len(island_program_objects)] * len(island_program_objects) + + # Sample parent based on weights + parent = random.choices(island_program_objects, weights=weights, k=1)[0] + parent_id = parent.id + + parent = self.programs.get(parent_id) + if not parent: + # Should not happen, but handle gracefully + logger.error(f"Parent program {parent_id} not found in database") + return self._sample_random_parent() + + return parent + + def _sample_from_island_random(self, island_id: int) -> Program: + """ + Sample a completely random parent from a specific island (uniform distribution) + + Args: + island_id: The island to sample from + + Returns: + Parent program selected uniformly at random + """ + island_id = island_id % len(self.islands) + island_programs = list(self.islands[island_id]) + + if not island_programs: + # Island is empty, fall back to any available program + logger.debug(f"Island {island_id} is empty, sampling from all programs") + return self._sample_random_parent() + + # Clean up stale references + valid_programs = [pid for pid in island_programs if pid in self.programs] + + if not valid_programs: + logger.warning(f"Island {island_id} has no valid programs, falling back to random sampling") + return self._sample_random_parent() + + # Uniform random selection + parent_id = random.choice(valid_programs) + return self.programs[parent_id] + + def _sample_from_archive_for_island(self, island_id: int) -> Program: + """ + Sample a parent from the archive, preferring programs from the specified island + + Args: + island_id: The island to prefer programs from + + Returns: + Parent program from archive (preferably from the specified island) + """ + if not self.archive: + # Fallback to weighted sampling from island + logger.debug(f"Archive is empty, falling back to weighted island sampling") + return self._sample_from_island_weighted(island_id) + + # Clean up stale references in archive + valid_archive = [pid for pid in self.archive if pid in self.programs] + + if not valid_archive: + logger.warning("Archive has no valid programs, falling back to weighted island sampling") + return self._sample_from_island_weighted(island_id) + + island_id = island_id % len(self.islands) + + # Prefer programs from the specified island in archive + archive_programs_in_island = [ + pid + for pid in valid_archive + if self.programs[pid].metadata.get("island") == island_id + ] + + if archive_programs_in_island: + parent_id = random.choice(archive_programs_in_island) + return self.programs[parent_id] + else: + # Fall back to any valid archive program if island has none + parent_id = random.choice(valid_archive) + return self.programs[parent_id] + def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]: """ Sample inspiration programs for the next evolution step. diff --git a/tests/test_sample_from_island_ratios.py b/tests/test_sample_from_island_ratios.py new file mode 100644 index 00000000..a768d4be --- /dev/null +++ b/tests/test_sample_from_island_ratios.py @@ -0,0 +1,305 @@ +""" +Tests for sample_from_island() exploration/exploitation/random ratio compliance + +This ensures that sample_from_island() uses the same sampling strategy as sample() +to maintain consistent behavior between single-process and parallel execution modes. +""" + +import random +import unittest +from openevolve.config import Config +from openevolve.database import Program, ProgramDatabase + + +class TestSampleFromIslandRatios(unittest.TestCase): + """Tests for sample_from_island() ratio compliance""" + + def setUp(self): + """Set up test database with programs""" + config = Config() + config.database.in_memory = True + config.database.num_islands = 3 + config.database.archive_size = 10 + config.database.population_size = 100 + + # Set specific exploration/exploitation ratios for testing + config.database.exploration_ratio = 0.3 + config.database.exploitation_ratio = 0.4 + # Remaining 0.3 will be weighted sampling + + self.db = ProgramDatabase(config.database) + + # Add programs to island 0 + for i in range(20): + program = Program( + id=f"island0_prog{i}", + code=f"def test{i}(): pass", + language="python", + metrics={"score": 0.5 + i * 0.01}, # Increasing scores + ) + self.db.add(program, target_island=0) + + # Add some programs to island 1 + for i in range(15): + program = Program( + id=f"island1_prog{i}", + code=f"def test_island1_{i}(): pass", + language="python", + metrics={"score": 0.6 + i * 0.01}, + ) + self.db.add(program, target_island=1) + + # Add some programs to island 2 + for i in range(10): + program = Program( + id=f"island2_prog{i}", + code=f"def test_island2_{i}(): pass", + language="python", + metrics={"score": 0.7 + i * 0.01}, + ) + self.db.add(program, target_island=2) + + def test_exploration_exploitation_random_ratios(self): + """Test that sample_from_island respects exploration/exploitation/random ratios""" + # Set random seed for reproducibility + random.seed(42) + + # Sample many times and track which mode was used + num_samples = 1000 + exploration_count = 0 + exploitation_count = 0 + weighted_count = 0 + + for _ in range(num_samples): + # Set seed and get rand_val that will be used + # We need to track what the internal rand_val would be + rand_val = random.random() + + # Reset seed to get same rand_val in sample_from_island + random.seed(random.getstate()[1][0]) # Use current state + + parent, inspirations = self.db.sample_from_island(island_id=0) + + # Determine which mode should have been used based on rand_val + if rand_val < self.db.config.exploration_ratio: + exploration_count += 1 + elif rand_val < self.db.config.exploration_ratio + self.db.config.exploitation_ratio: + exploitation_count += 1 + else: + weighted_count += 1 + + # Check that counts are within reasonable bounds (allowing 10% variance) + expected_exploration = num_samples * self.db.config.exploration_ratio + expected_exploitation = num_samples * self.db.config.exploitation_ratio + expected_weighted = num_samples * (1 - self.db.config.exploration_ratio - self.db.config.exploitation_ratio) + + # Allow 15% tolerance for statistical variance + tolerance = 0.15 + + self.assertGreater(exploration_count, expected_exploration * (1 - tolerance)) + self.assertLess(exploration_count, expected_exploration * (1 + tolerance)) + + self.assertGreater(exploitation_count, expected_exploitation * (1 - tolerance)) + self.assertLess(exploitation_count, expected_exploitation * (1 + tolerance)) + + self.assertGreater(weighted_count, expected_weighted * (1 - tolerance)) + self.assertLess(weighted_count, expected_weighted * (1 + tolerance)) + + def test_sample_from_island_returns_from_correct_island(self): + """Test that sampled parent is actually from the requested island""" + random.seed(42) + + for _ in range(100): + parent, inspirations = self.db.sample_from_island(island_id=0) + + # Parent should be from island 0 + self.assertEqual(parent.metadata.get("island"), 0) + + # Inspirations should also be from island 0 + for insp in inspirations: + self.assertEqual(insp.metadata.get("island"), 0) + + def test_sample_from_island_with_different_islands(self): + """Test that different islands return different programs""" + random.seed(42) + + island0_programs = set() + island1_programs = set() + + for _ in range(50): + parent0, _ = self.db.sample_from_island(island_id=0) + island0_programs.add(parent0.id) + + parent1, _ = self.db.sample_from_island(island_id=1) + island1_programs.add(parent1.id) + + # Programs from island 0 should be different from island 1 + # (they should be disjoint sets) + self.assertEqual(len(island0_programs & island1_programs), 0) + + def test_exploitation_uses_archive(self): + """Test that exploitation mode samples from archive""" + # Force exploitation ratio to 1.0 to guarantee archive sampling + self.db.config.exploration_ratio = 0.0 + self.db.config.exploitation_ratio = 1.0 + + random.seed(42) + + # Sample many times and check if we get archive programs + for _ in range(20): + parent, _ = self.db.sample_from_island(island_id=0) + + # Parent should either be from archive or from island if archive is empty + # Since we have programs, archive should be populated + self.assertIsNotNone(parent) + self.assertIn(parent.id, self.db.programs) + + def test_exploration_mode_uniform_distribution(self): + """Test that exploration mode uses uniform random sampling""" + # Force exploration ratio to 1.0 + self.db.config.exploration_ratio = 1.0 + self.db.config.exploitation_ratio = 0.0 + + random.seed(42) + + # Sample many times and track distribution + sampled_ids = [] + for _ in range(200): + parent, _ = self.db.sample_from_island(island_id=0) + sampled_ids.append(parent.id) + + # Count occurrences + from collections import Counter + counts = Counter(sampled_ids) + + # In exploration mode (uniform random), all programs should have similar counts + # Check that no program is sampled way more than others (> 2x average) + avg_count = len(sampled_ids) / len(counts) + max_count = max(counts.values()) + min_count = min(counts.values()) + + # With uniform distribution, max shouldn't be more than 3x the average + # (allowing for statistical variance) + self.assertLess(max_count, avg_count * 3) + self.assertGreater(min_count, avg_count * 0.3) + + def test_weighted_mode_favors_high_fitness(self): + """Test that weighted mode favors programs with higher fitness""" + # Force weighted ratio to 1.0 + self.db.config.exploration_ratio = 0.0 + self.db.config.exploitation_ratio = 0.0 + + random.seed(42) + + # Sample many times and track which programs are selected + sampled_scores = [] + for _ in range(200): + parent, _ = self.db.sample_from_island(island_id=0) + sampled_scores.append(parent.metrics["score"]) + + # Average sampled score should be higher than median score of all programs + island_programs = [self.db.programs[pid] for pid in self.db.islands[0]] + all_scores = [p.metrics["score"] for p in island_programs] + median_score = sorted(all_scores)[len(all_scores) // 2] + avg_sampled_score = sum(sampled_scores) / len(sampled_scores) + + # Weighted sampling should favor higher scores + self.assertGreater(avg_sampled_score, median_score) + + +class TestSampleFromIslandEdgeCases(unittest.TestCase): + """Tests for edge cases in sample_from_island()""" + + def setUp(self): + """Set up test database""" + config = Config() + config.database.in_memory = True + config.database.num_islands = 3 + self.db = ProgramDatabase(config.database) + + def test_empty_island_fallback(self): + """Test that empty island falls back to sample()""" + # Add programs only to island 0 + for i in range(5): + program = Program( + id=f"prog{i}", + code=f"def test{i}(): pass", + language="python", + metrics={"score": 0.5 + i * 0.1}, + ) + self.db.add(program, target_island=0) + + # Try to sample from empty island 1 + parent, inspirations = self.db.sample_from_island(island_id=1) + + # Should still return a parent (from fallback) + self.assertIsNotNone(parent) + + def test_empty_archive_fallback(self): + """Test exploitation mode with empty archive""" + # Force exploitation mode + self.db.config.exploration_ratio = 0.0 + self.db.config.exploitation_ratio = 1.0 + + # Clear archive + self.db.archive.clear() + + # Add programs to island + for i in range(5): + program = Program( + id=f"prog{i}", + code=f"def test{i}(): pass", + language="python", + metrics={"score": 0.5 + i * 0.1}, + ) + self.db.add(program, target_island=0) + + # Clear archive again (add() populates it) + self.db.archive.clear() + + # Sample should still work (fall back to weighted sampling) + parent, inspirations = self.db.sample_from_island(island_id=0) + self.assertIsNotNone(parent) + + def test_single_program_island(self): + """Test sampling from island with only one program""" + program = Program( + id="solo", + code="def solo(): pass", + language="python", + metrics={"score": 0.8}, + ) + self.db.add(program, target_island=0) + + # Should successfully return the single program + parent, inspirations = self.db.sample_from_island(island_id=0) + self.assertEqual(parent.id, "solo") + # No inspirations available (only one program) + self.assertEqual(len(inspirations), 0) + + def test_island_id_wrapping(self): + """Test that island_id wraps around correctly""" + # Add programs to island 0 + for i in range(5): + program = Program( + id=f"prog{i}", + code=f"def test{i}(): pass", + language="python", + metrics={"score": 0.5}, + ) + self.db.add(program, target_island=0) + + # Sample from island_id = 3 (should wrap to 0 since we have 3 islands) + parent1, _ = self.db.sample_from_island(island_id=3) + + # Sample from island_id = 0 + random.seed(42) + parent2, _ = self.db.sample_from_island(island_id=0) + + # Both should be from the same island (island 0) + self.assertEqual(parent1.metadata.get("island"), 0) + self.assertEqual(parent2.metadata.get("island"), 0) + + +if __name__ == "__main__": + unittest.main() From 3edfef1976c8cddef485858df91b6719a4554c61 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sun, 5 Oct 2025 07:22:25 +0530 Subject: [PATCH 2/3] Update _version.py --- openevolve/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openevolve/_version.py b/openevolve/_version.py index d68ffb7b..84a38c6e 100644 --- a/openevolve/_version.py +++ b/openevolve/_version.py @@ -1,3 +1,3 @@ """Version information for openevolve package.""" -__version__ = "0.2.15" +__version__ = "0.2.16" From 5e2d645711975f800f3b6fe25624944184f98e8a Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sun, 5 Oct 2025 07:43:39 +0530 Subject: [PATCH 3/3] Update test_sample_from_island_ratios.py --- tests/test_sample_from_island_ratios.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/test_sample_from_island_ratios.py b/tests/test_sample_from_island_ratios.py index a768d4be..54eacb53 100644 --- a/tests/test_sample_from_island_ratios.py +++ b/tests/test_sample_from_island_ratios.py @@ -197,14 +197,17 @@ def test_weighted_mode_favors_high_fitness(self): parent, _ = self.db.sample_from_island(island_id=0) sampled_scores.append(parent.metrics["score"]) - # Average sampled score should be higher than median score of all programs + # Average sampled score should be higher than mean score of all programs + # (mean represents what uniform random sampling would produce) island_programs = [self.db.programs[pid] for pid in self.db.islands[0]] all_scores = [p.metrics["score"] for p in island_programs] - median_score = sorted(all_scores)[len(all_scores) // 2] + mean_score = sum(all_scores) / len(all_scores) avg_sampled_score = sum(sampled_scores) / len(sampled_scores) - # Weighted sampling should favor higher scores - self.assertGreater(avg_sampled_score, median_score) + # Weighted sampling should favor higher scores (shift average upward from mean) + self.assertGreater(avg_sampled_score, mean_score, + f"Weighted sampling should favor high fitness: " + f"sampled_avg={avg_sampled_score:.4f} should be > mean={mean_score:.4f}") class TestSampleFromIslandEdgeCases(unittest.TestCase):