|
| 1 | +""" |
| 2 | +Test for issue #313: asyncio.run() error in novelty checking |
| 3 | +https://github.com/algorithmicsuperintelligence/openevolve/issues/313 |
| 4 | +
|
| 5 | +This test reproduces the bug where calling database.add() from within an async context |
| 6 | +triggers a novelty check that uses asyncio.run(), which fails because it's already |
| 7 | +running in an event loop. |
| 8 | +""" |
| 9 | + |
| 10 | +import unittest |
| 11 | +import asyncio |
| 12 | +from unittest.mock import AsyncMock, MagicMock, patch, Mock |
| 13 | +from openevolve.config import Config |
| 14 | +from openevolve.database import Program, ProgramDatabase |
| 15 | + |
| 16 | + |
| 17 | +class MockLLM: |
| 18 | + """Mock LLM that implements the async interface""" |
| 19 | + |
| 20 | + async def generate_with_context(self, system_message: str, messages: list): |
| 21 | + """Mock async generate method that returns NOVEL""" |
| 22 | + return "NOVEL" |
| 23 | + |
| 24 | + |
| 25 | +class TestNoveltyAsyncioIssue(unittest.TestCase): |
| 26 | + """Test for asyncio.run() error in novelty checking (issue #313)""" |
| 27 | + |
| 28 | + @patch('openevolve.embedding.EmbeddingClient') |
| 29 | + def setUp(self, mock_embedding_client_class): |
| 30 | + """Set up test database with novelty checking enabled""" |
| 31 | + # Mock the embedding client |
| 32 | + mock_instance = MagicMock() |
| 33 | + mock_instance.get_embedding.return_value = [0.1] * 1536 # Mock embedding vector |
| 34 | + mock_embedding_client_class.return_value = mock_instance |
| 35 | + |
| 36 | + config = Config() |
| 37 | + config.database.in_memory = True |
| 38 | + config.database.embedding_model = "text-embedding-3-small" |
| 39 | + config.database.similarity_threshold = 0.99 |
| 40 | + config.database.novelty_llm = MockLLM() |
| 41 | + |
| 42 | + self.db = ProgramDatabase(config.database) |
| 43 | + self.mock_embedding_client_class = mock_embedding_client_class |
| 44 | + |
| 45 | + def test_novelty_check_from_async_context_works(self): |
| 46 | + """ |
| 47 | + Test that novelty checking works correctly when called from within |
| 48 | + an async context (this was the bug in issue #313). |
| 49 | +
|
| 50 | + Expected behavior: Should successfully run the novelty check without |
| 51 | + any asyncio.run() errors, properly using ThreadPoolExecutor to handle |
| 52 | + the async LLM call from within a running event loop. |
| 53 | + """ |
| 54 | + import logging |
| 55 | + |
| 56 | + # Create two programs with similar embeddings to trigger LLM novelty check |
| 57 | + program1 = Program( |
| 58 | + id="prog1", |
| 59 | + code="def test(): return 1", |
| 60 | + language="python", |
| 61 | + metrics={"score": 0.5}, |
| 62 | + ) |
| 63 | + |
| 64 | + program2 = Program( |
| 65 | + id="prog2", |
| 66 | + code="def test(): return 2", |
| 67 | + language="python", |
| 68 | + metrics={"score": 0.6}, |
| 69 | + parent_id="prog1", |
| 70 | + ) |
| 71 | + |
| 72 | + async def async_add_programs(): |
| 73 | + """Add programs from async context - this simulates controller.run()""" |
| 74 | + # Add first program (no novelty check, no similar programs yet) |
| 75 | + prog1_id = self.db.add(program1) |
| 76 | + self.assertIsNotNone(prog1_id) |
| 77 | + |
| 78 | + # Add second program - this triggers novelty check |
| 79 | + # Since embeddings are similar (both [0.1] * 1536), it will call |
| 80 | + # _llm_judge_novelty which should now work correctly |
| 81 | + prog2_id = self.db.add(program2) |
| 82 | + |
| 83 | + # The novelty check should succeed without errors |
| 84 | + # The program should be added (MockLLM returns "NOVEL") |
| 85 | + self.assertIsNotNone(prog2_id) |
| 86 | + |
| 87 | + return True |
| 88 | + |
| 89 | + # This should work without any errors now |
| 90 | + result = asyncio.run(async_add_programs()) |
| 91 | + self.assertTrue(result) |
| 92 | + |
| 93 | + # Verify both programs were added |
| 94 | + self.assertIn("prog1", self.db.programs) |
| 95 | + self.assertIn("prog2", self.db.programs) |
| 96 | + |
| 97 | + def test_novelty_check_from_sync_context_works(self): |
| 98 | + """ |
| 99 | + Test that novelty checking also works correctly when called from |
| 100 | + a synchronous (non-async) context. |
| 101 | +
|
| 102 | + Expected behavior: Should successfully run the novelty check using |
| 103 | + asyncio.run() since there's no running event loop. |
| 104 | + """ |
| 105 | + # Create two programs with similar embeddings to trigger LLM novelty check |
| 106 | + program1 = Program( |
| 107 | + id="prog3", |
| 108 | + code="def test(): return 3", |
| 109 | + language="python", |
| 110 | + metrics={"score": 0.5}, |
| 111 | + ) |
| 112 | + |
| 113 | + program2 = Program( |
| 114 | + id="prog4", |
| 115 | + code="def test(): return 4", |
| 116 | + language="python", |
| 117 | + metrics={"score": 0.6}, |
| 118 | + parent_id="prog3", |
| 119 | + ) |
| 120 | + |
| 121 | + # Add programs from synchronous context (no event loop running) |
| 122 | + prog1_id = self.db.add(program1) |
| 123 | + self.assertIsNotNone(prog1_id) |
| 124 | + |
| 125 | + prog2_id = self.db.add(program2) |
| 126 | + self.assertIsNotNone(prog2_id) |
| 127 | + |
| 128 | + # Verify both programs were added |
| 129 | + self.assertIn("prog3", self.db.programs) |
| 130 | + self.assertIn("prog4", self.db.programs) |
| 131 | + |
| 132 | + def test_novelty_check_disabled_works_fine(self): |
| 133 | + """ |
| 134 | + Test that when novelty checking is disabled, adding programs |
| 135 | + from async context works fine (this is the workaround from issue #313). |
| 136 | + """ |
| 137 | + # Create a new database with novelty checking disabled |
| 138 | + config = Config() |
| 139 | + config.database.in_memory = True |
| 140 | + config.database.similarity_threshold = 0.0 # Disable novelty checking |
| 141 | + db_no_novelty = ProgramDatabase(config.database) |
| 142 | + |
| 143 | + program1 = Program( |
| 144 | + id="prog1", |
| 145 | + code="def test(): return 1", |
| 146 | + language="python", |
| 147 | + metrics={"score": 0.5}, |
| 148 | + ) |
| 149 | + |
| 150 | + program2 = Program( |
| 151 | + id="prog2", |
| 152 | + code="def test(): return 2", |
| 153 | + language="python", |
| 154 | + metrics={"score": 0.6}, |
| 155 | + ) |
| 156 | + |
| 157 | + async def async_add_programs(): |
| 158 | + """Add programs from async context""" |
| 159 | + db_no_novelty.add(program1) |
| 160 | + db_no_novelty.add(program2) |
| 161 | + return True |
| 162 | + |
| 163 | + # This should work fine without novelty checking |
| 164 | + result = asyncio.run(async_add_programs()) |
| 165 | + self.assertTrue(result) |
| 166 | + |
| 167 | + |
| 168 | +if __name__ == "__main__": |
| 169 | + unittest.main() |
0 commit comments