From 8fe021725d4a7661d22c7a115293e1ed1a4053ec Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Sun, 5 Oct 2025 07:15:17 +0530
Subject: [PATCH 1/3] fix

---
 openevolve/database.py                  | 209 ++++++++++++----
 tests/test_sample_from_island_ratios.py | 305 ++++++++++++++++++++++++
 2 files changed, 464 insertions(+), 50 deletions(-)
 create mode 100644 tests/test_sample_from_island_ratios.py

diff --git a/openevolve/database.py b/openevolve/database.py
index c62a5488..9313e95e 100644
--- a/openevolve/database.py
+++ b/openevolve/database.py
@@ -368,87 +368,70 @@ def sample_from_island(
     ) -> Tuple[Program, List[Program]]:
         """
         Sample a program and inspirations from a specific island without modifying current_island
-        
+
         This method is thread-safe and doesn't modify shared state, avoiding race conditions
         when multiple workers sample from different islands concurrently.
-        
+
+        Uses the same exploration/exploitation/random strategy as sample() to ensure
+        consistent behavior between single-process and parallel execution modes.
+
         Args:
             island_id: The island to sample from
             num_inspirations: Number of inspiration programs to sample (defaults to 5)
-            
+
         Returns:
             Tuple of (parent_program, inspiration_programs)
         """
         # Ensure valid island ID
         island_id = island_id % len(self.islands)
-        
+
         # Get programs from the specific island
         island_programs = list(self.islands[island_id])
-        
+
         if not island_programs:
             # Island is empty, fall back to sampling from all programs
             logger.debug(f"Island {island_id} is empty, sampling from all programs")
             return self.sample(num_inspirations)
-        
-        # Select parent from island programs
-        if len(island_programs) == 1:
-            parent_id = island_programs[0]
+
+        # Use exploration_ratio and exploitation_ratio to decide sampling strategy
+        # This matches the logic in _sample_parent() for consistent behavior
+        rand_val = random.random()
+
+        if rand_val < self.config.exploration_ratio:
+            # EXPLORATION: Sample randomly from island (diverse sampling)
+            parent = self._sample_from_island_random(island_id)
+            sampling_mode = "exploration"
+        elif rand_val < self.config.exploration_ratio + self.config.exploitation_ratio:
+            # EXPLOITATION: Sample from archive (elite programs)
+            parent = self._sample_from_archive_for_island(island_id)
+            sampling_mode = "exploitation"
         else:
-            # Use weighted sampling based on program scores
-            island_program_objects = [
-                self.programs[pid] for pid in island_programs 
-                if pid in self.programs
-            ]
-            
-            if not island_program_objects:
-                # Fallback if programs not found
-                parent_id = random.choice(island_programs)
-            else:
-                # Calculate weights based on fitness scores
-                weights = []
-                for prog in island_program_objects:
-                    fitness = get_fitness_score(prog.metrics, self.config.feature_dimensions)
-                    # Add small epsilon to avoid zero weights
-                    weights.append(max(fitness, 0.001))
-                
-                # Normalize weights
-                total_weight = sum(weights)
-                if total_weight > 0:
-                    weights = [w / total_weight for w in weights]
-                else:
-                    weights = [1.0 / len(island_program_objects)] * len(island_program_objects)
-                
-                # Sample parent based on weights
-                parent = random.choices(island_program_objects, weights=weights, k=1)[0]
-                parent_id = parent.id
-        
-        parent = self.programs.get(parent_id)
-        if not parent:
-            # Should not happen, but handle gracefully
-            logger.error(f"Parent program {parent_id} not found in database")
-            return self.sample(num_inspirations)
-        
+            # WEIGHTED: Use fitness-weighted sampling (remaining probability)
+            parent = self._sample_from_island_weighted(island_id)
+            sampling_mode = "weighted"
+
         # Select inspirations from the same island
         if num_inspirations is None:
             num_inspirations = 5  # Default for backward compatibility
-            
+
         # Get other programs from the island for inspirations
-        other_programs = [pid for pid in island_programs if pid != parent_id]
-        
+        other_programs = [pid for pid in island_programs if pid != parent.id]
+
         if len(other_programs) < num_inspirations:
             # Not enough programs in island, use what we have
             inspiration_ids = other_programs
         else:
             # Sample inspirations
             inspiration_ids = random.sample(other_programs, num_inspirations)
-        
+
         inspirations = [
-            self.programs[pid] for pid in inspiration_ids 
+            self.programs[pid] for pid in inspiration_ids
             if pid in self.programs
         ]
-        
+
         logger.debug(
-            f"Sampled parent {parent.id} and {len(inspirations)} inspirations from island {island_id}"
+            f"Sampled parent {parent.id} and {len(inspirations)} inspirations from island {island_id} "
+            f"(mode: {sampling_mode}, rand_val: {rand_val:.3f})"
         )
         return parent, inspirations
 
@@ -1264,6 +1247,132 @@ def _sample_random_parent(self) -> Program:
         program_id = random.choice(list(self.programs.keys()))
         return self.programs[program_id]
 
+    def _sample_from_island_weighted(self, island_id: int) -> Program:
+        """
+        Sample a parent from a specific island using fitness-weighted selection
+
+        Args:
+            island_id: The island to sample from
+
+        Returns:
+            Parent program selected using fitness-weighted sampling
+        """
+        island_id = island_id % len(self.islands)
+        island_programs = list(self.islands[island_id])
+
+        if not island_programs:
+            # Island is empty, fall back to any available program
+            logger.debug(f"Island {island_id} is empty, sampling from all programs")
+            return self._sample_random_parent()
+
+        # Select parent from island programs
+        if len(island_programs) == 1:
+            parent_id = island_programs[0]
+        else:
+            # Use weighted sampling based on program scores
+            island_program_objects = [
+                self.programs[pid] for pid in island_programs
+                if pid in self.programs
+            ]
+
+            if not island_program_objects:
+                # Fallback if programs not found
+                parent_id = random.choice(island_programs)
+            else:
+                # Calculate weights based on fitness scores
+                weights = []
+                for prog in island_program_objects:
+                    fitness = get_fitness_score(prog.metrics, self.config.feature_dimensions)
+                    # Add small epsilon to avoid zero weights
+                    weights.append(max(fitness, 0.001))
+
+                # Normalize weights
+                total_weight = sum(weights)
+                if total_weight > 0:
+                    weights = [w / total_weight for w in weights]
+                else:
+                    weights = [1.0 / len(island_program_objects)] * len(island_program_objects)
+
+                # Sample parent based on weights
+                parent = random.choices(island_program_objects, weights=weights, k=1)[0]
+                parent_id = parent.id
+
+        parent = self.programs.get(parent_id)
+        if not parent:
+            # Should not happen, but handle gracefully
+            logger.error(f"Parent program {parent_id} not found in database")
+            return self._sample_random_parent()
+
+        return parent
+
+    def _sample_from_island_random(self, island_id: int) -> Program:
+        """
+        Sample a completely random parent from a specific island (uniform distribution)
+
+        Args:
+            island_id: The island to sample from
+
+        Returns:
+            Parent program selected uniformly at random
+        """
+        island_id = island_id % len(self.islands)
+        island_programs = list(self.islands[island_id])
+
+        if not island_programs:
+            # Island is empty, fall back to any available program
+            logger.debug(f"Island {island_id} is empty, sampling from all programs")
+            return self._sample_random_parent()
+
+        # Clean up stale references
+        valid_programs = [pid for pid in island_programs if pid in self.programs]
+
+        if not valid_programs:
+            logger.warning(f"Island {island_id} has no valid programs, falling back to random sampling")
+            return self._sample_random_parent()
+
+        # Uniform random selection
+        parent_id = random.choice(valid_programs)
+        return self.programs[parent_id]
+
+    def _sample_from_archive_for_island(self, island_id: int) -> Program:
+        """
+        Sample a parent from the archive, preferring programs from the specified island
+
+        Args:
+            island_id: The island to prefer programs from
+
+        Returns:
+            Parent program from archive (preferably from the specified island)
+        """
+        if not self.archive:
+            # Fallback to weighted sampling from island
+            logger.debug(f"Archive is empty, falling back to weighted island sampling")
+            return self._sample_from_island_weighted(island_id)
+
+        # Clean up stale references in archive
+        valid_archive = [pid for pid in self.archive if pid in self.programs]
+
+        if not valid_archive:
+            logger.warning("Archive has no valid programs, falling back to weighted island sampling")
+            return self._sample_from_island_weighted(island_id)
+
+        island_id = island_id % len(self.islands)
+
+        # Prefer programs from the specified island in archive
+        archive_programs_in_island = [
+            pid
+            for pid in valid_archive
+            if self.programs[pid].metadata.get("island") == island_id
+        ]
+
+        if archive_programs_in_island:
+            parent_id = random.choice(archive_programs_in_island)
+            return self.programs[parent_id]
+        else:
+            # Fall back to any valid archive program if island has none
+            parent_id = random.choice(valid_archive)
+            return self.programs[parent_id]
+
     def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:
         """
         Sample inspiration programs for the next evolution step.
diff --git a/tests/test_sample_from_island_ratios.py b/tests/test_sample_from_island_ratios.py
new file mode 100644
index 00000000..a768d4be
--- /dev/null
+++ b/tests/test_sample_from_island_ratios.py
@@ -0,0 +1,305 @@
+"""
+Tests for sample_from_island() exploration/exploitation/random ratio compliance
+
+This ensures that sample_from_island() uses the same sampling strategy as sample()
+to maintain consistent behavior between single-process and parallel execution modes.
+"""
+
+import random
+import unittest
+from openevolve.config import Config
+from openevolve.database import Program, ProgramDatabase
+
+
+class TestSampleFromIslandRatios(unittest.TestCase):
+    """Tests for sample_from_island() ratio compliance"""
+
+    def setUp(self):
+        """Set up test database with programs"""
+        config = Config()
+        config.database.in_memory = True
+        config.database.num_islands = 3
+        config.database.archive_size = 10
+        config.database.population_size = 100
+
+        # Set specific exploration/exploitation ratios for testing
+        config.database.exploration_ratio = 0.3
+        config.database.exploitation_ratio = 0.4
+        # Remaining 0.3 will be weighted sampling
+
+        self.db = ProgramDatabase(config.database)
+
+        # Add programs to island 0
+        for i in range(20):
+            program = Program(
+                id=f"island0_prog{i}",
+                code=f"def test{i}(): pass",
+                language="python",
+                metrics={"score": 0.5 + i * 0.01},  # Increasing scores
+            )
+            self.db.add(program, target_island=0)
+
+        # Add some programs to island 1
+        for i in range(15):
+            program = Program(
+                id=f"island1_prog{i}",
+                code=f"def test_island1_{i}(): pass",
+                language="python",
+                metrics={"score": 0.6 + i * 0.01},
+            )
+            self.db.add(program, target_island=1)
+
+        # Add some programs to island 2
+        for i in range(10):
+            program = Program(
+                id=f"island2_prog{i}",
+                code=f"def test_island2_{i}(): pass",
+                language="python",
+                metrics={"score": 0.7 + i * 0.01},
+            )
+            self.db.add(program, target_island=2)
+
+    def test_exploration_exploitation_random_ratios(self):
+        """Test that sample_from_island respects exploration/exploitation/random ratios"""
+        # Set random seed for reproducibility
+        random.seed(42)
+
+        # Sample many times and track which mode was used
+        num_samples = 1000
+        exploration_count = 0
+        exploitation_count = 0
+        weighted_count = 0
+
+        for _ in range(num_samples):
+            # Set seed and get rand_val that will be used
+            # We need to track what the internal rand_val would be
+            rand_val = random.random()
+
+            # Reset seed to get same rand_val in sample_from_island
+            random.seed(random.getstate()[1][0])  # Use current state
+
+            parent, inspirations = self.db.sample_from_island(island_id=0)
+
+            # Determine which mode should have been used based on rand_val
+            if rand_val < self.db.config.exploration_ratio:
+                exploration_count += 1
+            elif rand_val < self.db.config.exploration_ratio + self.db.config.exploitation_ratio:
+                exploitation_count += 1
+            else:
+                weighted_count += 1
+
+        # Check that counts are within reasonable bounds (allowing 10% variance)
+        expected_exploration = num_samples * self.db.config.exploration_ratio
+        expected_exploitation = num_samples * self.db.config.exploitation_ratio
+        expected_weighted = num_samples * (1 - self.db.config.exploration_ratio - self.db.config.exploitation_ratio)
+
+        # Allow 15% tolerance for statistical variance
+        tolerance = 0.15
+
+        self.assertGreater(exploration_count, expected_exploration * (1 - tolerance))
+        self.assertLess(exploration_count, expected_exploration * (1 + tolerance))
+
+        self.assertGreater(exploitation_count, expected_exploitation * (1 - tolerance))
+        self.assertLess(exploitation_count, expected_exploitation * (1 + tolerance))
+
+        self.assertGreater(weighted_count, expected_weighted * (1 - tolerance))
+        self.assertLess(weighted_count, expected_weighted * (1 + tolerance))
+
+    def test_sample_from_island_returns_from_correct_island(self):
+        """Test that sampled parent is actually from the requested island"""
+        random.seed(42)
+
+        for _ in range(100):
+            parent, inspirations = self.db.sample_from_island(island_id=0)
+
+            # Parent should be from island 0
+            self.assertEqual(parent.metadata.get("island"), 0)
+
+            # Inspirations should also be from island 0
+            for insp in inspirations:
+                self.assertEqual(insp.metadata.get("island"), 0)
+
+    def test_sample_from_island_with_different_islands(self):
+        """Test that different islands return different programs"""
+        random.seed(42)
+
+        island0_programs = set()
+        island1_programs = set()
+
+        for _ in range(50):
+            parent0, _ = self.db.sample_from_island(island_id=0)
+            island0_programs.add(parent0.id)
+
+            parent1, _ = self.db.sample_from_island(island_id=1)
+            island1_programs.add(parent1.id)
+
+        # Programs from island 0 should be different from island 1
+        # (they should be disjoint sets)
+        self.assertEqual(len(island0_programs & island1_programs), 0)
+
+    def test_exploitation_uses_archive(self):
+        """Test that exploitation mode samples from archive"""
+        # Force exploitation ratio to 1.0 to guarantee archive sampling
+        self.db.config.exploration_ratio = 0.0
+        self.db.config.exploitation_ratio = 1.0
+
+        random.seed(42)
+
+        # Sample many times and check if we get archive programs
+        for _ in range(20):
+            parent, _ = self.db.sample_from_island(island_id=0)
+
+            # Parent should either be from archive or from island if archive is empty
+            # Since we have programs, archive should be populated
+            self.assertIsNotNone(parent)
+            self.assertIn(parent.id, self.db.programs)
+
+    def test_exploration_mode_uniform_distribution(self):
+        """Test that exploration mode uses uniform random sampling"""
+        # Force exploration ratio to 1.0
+        self.db.config.exploration_ratio = 1.0
+        self.db.config.exploitation_ratio = 0.0
+
+        random.seed(42)
+
+        # Sample many times and track distribution
+        sampled_ids = []
+        for _ in range(200):
+            parent, _ = self.db.sample_from_island(island_id=0)
+            sampled_ids.append(parent.id)
+
+        # Count occurrences
+        from collections import Counter
+        counts = Counter(sampled_ids)
+
+        # In exploration mode (uniform random), all programs should have similar counts
+        # Check that no program is sampled way more than others (> 2x average)
+        avg_count = len(sampled_ids) / len(counts)
+        max_count = max(counts.values())
+        min_count = min(counts.values())
+
+        # With uniform distribution, max shouldn't be more than 3x the average
+        # (allowing for statistical variance)
+        self.assertLess(max_count, avg_count * 3)
+        self.assertGreater(min_count, avg_count * 0.3)
+
+    def test_weighted_mode_favors_high_fitness(self):
+        """Test that weighted mode favors programs with higher fitness"""
+        # Force weighted ratio to 1.0
+        self.db.config.exploration_ratio = 0.0
+        self.db.config.exploitation_ratio = 0.0
+
+        random.seed(42)
+
+        # Sample many times and track which programs are selected
+        sampled_scores = []
+        for _ in range(200):
+            parent, _ = self.db.sample_from_island(island_id=0)
+            sampled_scores.append(parent.metrics["score"])
+
+        # Average sampled score should be higher than median score of all programs
+        island_programs = [self.db.programs[pid] for pid in self.db.islands[0]]
+        all_scores = [p.metrics["score"] for p in island_programs]
+        median_score = sorted(all_scores)[len(all_scores) // 2]
+        avg_sampled_score = sum(sampled_scores) / len(sampled_scores)
+
+        # Weighted sampling should favor higher scores
+        self.assertGreater(avg_sampled_score, median_score)
+
+
+class TestSampleFromIslandEdgeCases(unittest.TestCase):
+    """Tests for edge cases in sample_from_island()"""
+
+    def setUp(self):
+        """Set up test database"""
+        config = Config()
+        config.database.in_memory = True
+        config.database.num_islands = 3
+        self.db = ProgramDatabase(config.database)
+
+    def test_empty_island_fallback(self):
+        """Test that empty island falls back to sample()"""
+        # Add programs only to island 0
+        for i in range(5):
+            program = Program(
+                id=f"prog{i}",
+                code=f"def test{i}(): pass",
+                language="python",
+                metrics={"score": 0.5 + i * 0.1},
+            )
+            self.db.add(program, target_island=0)
+
+        # Try to sample from empty island 1
+        parent, inspirations = self.db.sample_from_island(island_id=1)
+
+        # Should still return a parent (from fallback)
+        self.assertIsNotNone(parent)
+
+    def test_empty_archive_fallback(self):
+        """Test exploitation mode with empty archive"""
+        # Force exploitation mode
+        self.db.config.exploration_ratio = 0.0
+        self.db.config.exploitation_ratio = 1.0
+
+        # Clear archive
+        self.db.archive.clear()
+
+        # Add programs to island
+        for i in range(5):
+            program = Program(
+                id=f"prog{i}",
+                code=f"def test{i}(): pass",
+                language="python",
+                metrics={"score": 0.5 + i * 0.1},
+            )
+            self.db.add(program, target_island=0)
+
+        # Clear archive again (add() populates it)
+        self.db.archive.clear()
+
+        # Sample should still work (fall back to weighted sampling)
+        parent, inspirations = self.db.sample_from_island(island_id=0)
+        self.assertIsNotNone(parent)
+
+    def test_single_program_island(self):
+        """Test sampling from island with only one program"""
+        program = Program(
+            id="solo",
+            code="def solo(): pass",
+            language="python",
+            metrics={"score": 0.8},
+        )
+        self.db.add(program, target_island=0)
+
+        # Should successfully return the single program
+        parent, inspirations = self.db.sample_from_island(island_id=0)
+        self.assertEqual(parent.id, "solo")
+        # No inspirations available (only one program)
+        self.assertEqual(len(inspirations), 0)
+
+    def test_island_id_wrapping(self):
+        """Test that island_id wraps around correctly"""
+        # Add programs to island 0
+        for i in range(5):
+            program = Program(
+                id=f"prog{i}",
+                code=f"def test{i}(): pass",
+                language="python",
+                metrics={"score": 0.5},
+            )
+            self.db.add(program, target_island=0)
+
+        # Sample from island_id = 3 (should wrap to 0 since we have 3 islands)
+        parent1, _ = self.db.sample_from_island(island_id=3)
+
+        # Sample from island_id = 0
+        random.seed(42)
+        parent2, _ = self.db.sample_from_island(island_id=0)
+
+        # Both should be from the same island (island 0)
+        self.assertEqual(parent1.metadata.get("island"), 0)
+        self.assertEqual(parent2.metadata.get("island"), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 3edfef1976c8cddef485858df91b6719a4554c61 Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Sun, 5 Oct 2025 07:22:25 +0530
Subject: [PATCH 2/3] Update _version.py

---
 openevolve/_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openevolve/_version.py b/openevolve/_version.py
index d68ffb7b..84a38c6e 100644
--- a/openevolve/_version.py
+++ b/openevolve/_version.py
@@ -1,3 +1,3 @@
 """Version information for openevolve package."""
 
-__version__ = "0.2.15"
+__version__ = "0.2.16"

From 5e2d645711975f800f3b6fe25624944184f98e8a Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Sun, 5 Oct 2025 07:43:39 +0530
Subject: [PATCH 3/3] Update test_sample_from_island_ratios.py

---
 tests/test_sample_from_island_ratios.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/test_sample_from_island_ratios.py b/tests/test_sample_from_island_ratios.py
index a768d4be..54eacb53 100644
--- a/tests/test_sample_from_island_ratios.py
+++ b/tests/test_sample_from_island_ratios.py
@@ -197,14 +197,17 @@ def test_weighted_mode_favors_high_fitness(self):
             parent, _ = self.db.sample_from_island(island_id=0)
             sampled_scores.append(parent.metrics["score"])
 
-        # Average sampled score should be higher than median score of all programs
+        # Average sampled score should be higher than mean score of all programs
+        # (mean represents what uniform random sampling would produce)
         island_programs = [self.db.programs[pid] for pid in self.db.islands[0]]
         all_scores = [p.metrics["score"] for p in island_programs]
-        median_score = sorted(all_scores)[len(all_scores) // 2]
+        mean_score = sum(all_scores) / len(all_scores)
         avg_sampled_score = sum(sampled_scores) / len(sampled_scores)
 
-        # Weighted sampling should favor higher scores
-        self.assertGreater(avg_sampled_score, median_score)
+        # Weighted sampling should favor higher scores (shift average upward from mean)
+        self.assertGreater(avg_sampled_score, mean_score,
+                          f"Weighted sampling should favor high fitness: "
+                          f"sampled_avg={avg_sampled_score:.4f} should be > mean={mean_score:.4f}")
 
 
 class TestSampleFromIslandEdgeCases(unittest.TestCase):