diff --git a/tests/test_basic.py b/tests/test_basic.py deleted file mode 100644 index f8a8710b6..000000000 --- a/tests/test_basic.py +++ /dev/null @@ -1,219 +0,0 @@ -""" -Basic tests for OpenEvolve components -""" - -import asyncio -import os -import tempfile -import unittest -from unittest.mock import MagicMock, patch - -import numpy as np - -from openevolve.config import Config -from openevolve.database import Program, ProgramDatabase -from openevolve.prompt.sampler import PromptSampler -from openevolve.utils.code_utils import apply_diff, extract_diffs - - -class TestCodeUtils(unittest.TestCase): - """Tests for code utilities""" - - def test_extract_diffs(self): - """Test extracting diffs from a response""" - diff_text = """ - Let's improve this code: - - <<<<<<< SEARCH - def hello(): - print("Hello") - ======= - def hello(): - print("Hello, World!") - >>>>>>> REPLACE - - Another change: - - <<<<<<< SEARCH - x = 1 - ======= - x = 2 - >>>>>>> REPLACE - """ - - diffs = extract_diffs(diff_text) - self.assertEqual(len(diffs), 2) - self.assertEqual( - diffs[0][0], - """ def hello(): - print("Hello")""", - ) - self.assertEqual( - diffs[0][1], - """ def hello(): - print("Hello, World!")""", - ) - self.assertEqual(diffs[1][0], " x = 1") - self.assertEqual(diffs[1][1], " x = 2") - - def test_apply_diff(self): - """Test applying diffs to code""" - original_code = """ - def hello(): - print("Hello") - - x = 1 - y = 2 - """ - - diff_text = """ - <<<<<<< SEARCH - def hello(): - print("Hello") - ======= - def hello(): - print("Hello, World!") - >>>>>>> REPLACE - - <<<<<<< SEARCH - x = 1 - ======= - x = 2 - >>>>>>> REPLACE - """ - - expected_code = """ - def hello(): - print("Hello, World!") - - x = 2 - y = 2 - """ - - result = apply_diff(original_code, diff_text) - - # Normalize whitespace for comparison - self.assertEqual( - result, - expected_code, - ) - - -class TestProgramDatabase(unittest.TestCase): - """Tests for program database""" - - def setUp(self): - """Set up test database""" - config = Config() - config.database.in_memory = True - self.db = ProgramDatabase(config.database) - - def test_add_and_get(self): - """Test adding and retrieving a program""" - program = Program( - id="test1", - code="def test(): pass", - language="python", - metrics={"score": 0.5}, - ) - - self.db.add(program) - - retrieved = self.db.get("test1") - self.assertIsNotNone(retrieved) - self.assertEqual(retrieved.id, "test1") - self.assertEqual(retrieved.code, "def test(): pass") - self.assertEqual(retrieved.metrics["score"], 0.5) - - def test_get_best_program(self): - """Test getting the best program""" - program1 = Program( - id="test1", - code="def test1(): pass", - language="python", - metrics={"score": 0.5}, - ) - - program2 = Program( - id="test2", - code="def test2(): pass", - language="python", - metrics={"score": 0.7}, - ) - - self.db.add(program1) - self.db.add(program2) - - best = self.db.get_best_program() - self.assertIsNotNone(best) - self.assertEqual(best.id, "test2") - - def test_sample(self): - """Test sampling from the database""" - program1 = Program( - id="test1", - code="def test1(): pass", - language="python", - metrics={"score": 0.5}, - ) - - program2 = Program( - id="test2", - code="def test2(): pass", - language="python", - metrics={"score": 0.7}, - ) - - self.db.add(program1) - self.db.add(program2) - - parent, inspirations = self.db.sample() - - self.assertIsNotNone(parent) - self.assertIn(parent.id, ["test1", "test2"]) - - -class TestPromptSampler(unittest.TestCase): - """Tests for prompt sampler""" - - def setUp(self): - """Set up test prompt sampler""" - config = Config() - self.prompt_sampler = PromptSampler(config.prompt) - - def test_build_prompt(self): - """Test building a prompt""" - current_program = "def test(): pass" - parent_program = "def test(): pass" - program_metrics = {"score": 0.5} - previous_programs = [ - { - "id": "prev1", - "code": "def prev1(): pass", - "metrics": {"score": 0.4}, - } - ] - top_programs = [ - { - "id": "top1", - "code": "def top1(): pass", - "metrics": {"score": 0.6}, - } - ] - - prompt = self.prompt_sampler.build_prompt( - current_program=current_program, - parent_program=parent_program, - program_metrics=program_metrics, - previous_programs=previous_programs, - top_programs=top_programs, - ) - - self.assertIn("system", prompt) - self.assertIn("user", prompt) - self.assertIn("def test(): pass", prompt["user"]) - self.assertIn("score: 0.5", prompt["user"]) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_code_utils.py b/tests/test_code_utils.py new file mode 100644 index 000000000..1446b01fd --- /dev/null +++ b/tests/test_code_utils.py @@ -0,0 +1,93 @@ +""" +Tests for code utilities in openevolve.utils.code_utils +""" + +import unittest +from openevolve.utils.code_utils import apply_diff, extract_diffs + + +class TestCodeUtils(unittest.TestCase): + """Tests for code utilities""" + + def test_extract_diffs(self): + """Test extracting diffs from a response""" + diff_text = """ + Let's improve this code: + + <<<<<<< SEARCH + def hello(): + print("Hello") + ======= + def hello(): + print("Hello, World!") + >>>>>>> REPLACE + + Another change: + + <<<<<<< SEARCH + x = 1 + ======= + x = 2 + >>>>>>> REPLACE + """ + + diffs = extract_diffs(diff_text) + self.assertEqual(len(diffs), 2) + self.assertEqual( + diffs[0][0], + """ def hello(): + print(\"Hello\")""", + ) + self.assertEqual( + diffs[0][1], + """ def hello(): + print(\"Hello, World!\")""", + ) + self.assertEqual(diffs[1][0], " x = 1") + self.assertEqual(diffs[1][1], " x = 2") + + def test_apply_diff(self): + """Test applying diffs to code""" + original_code = """ + def hello(): + print("Hello") + + x = 1 + y = 2 + """ + + diff_text = """ + <<<<<<< SEARCH + def hello(): + print("Hello") + ======= + def hello(): + print("Hello, World!") + >>>>>>> REPLACE + + <<<<<<< SEARCH + x = 1 + ======= + x = 2 + >>>>>>> REPLACE + """ + + expected_code = """ + def hello(): + print("Hello, World!") + + x = 2 + y = 2 + """ + + result = apply_diff(original_code, diff_text) + + # Normalize whitespace for comparison + self.assertEqual( + result, + expected_code, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_database.py b/tests/test_database.py new file mode 100644 index 000000000..bfa35040c --- /dev/null +++ b/tests/test_database.py @@ -0,0 +1,85 @@ +""" +Tests for ProgramDatabase in openevolve.database +""" + +import unittest +from openevolve.config import Config +from openevolve.database import Program, ProgramDatabase + + +class TestProgramDatabase(unittest.TestCase): + """Tests for program database""" + + def setUp(self): + """Set up test database""" + config = Config() + config.database.in_memory = True + self.db = ProgramDatabase(config.database) + + def test_add_and_get(self): + """Test adding and retrieving a program""" + program = Program( + id="test1", + code="def test(): pass", + language="python", + metrics={"score": 0.5}, + ) + + self.db.add(program) + + retrieved = self.db.get("test1") + self.assertIsNotNone(retrieved) + self.assertEqual(retrieved.id, "test1") + self.assertEqual(retrieved.code, "def test(): pass") + self.assertEqual(retrieved.metrics["score"], 0.5) + + def test_get_best_program(self): + """Test getting the best program""" + program1 = Program( + id="test1", + code="def test1(): pass", + language="python", + metrics={"score": 0.5}, + ) + + program2 = Program( + id="test2", + code="def test2(): pass", + language="python", + metrics={"score": 0.7}, + ) + + self.db.add(program1) + self.db.add(program2) + + best = self.db.get_best_program() + self.assertIsNotNone(best) + self.assertEqual(best.id, "test2") + + def test_sample(self): + """Test sampling from the database""" + program1 = Program( + id="test1", + code="def test1(): pass", + language="python", + metrics={"score": 0.5}, + ) + + program2 = Program( + id="test2", + code="def test2(): pass", + language="python", + metrics={"score": 0.7}, + ) + + self.db.add(program1) + self.db.add(program2) + + parent, inspirations = self.db.sample() + + self.assertIsNotNone(parent) + self.assertIn(parent.id, ["test1", "test2"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_llm_ensemble.py b/tests/test_llm_ensemble.py new file mode 100644 index 000000000..72e9c1342 --- /dev/null +++ b/tests/test_llm_ensemble.py @@ -0,0 +1,38 @@ +""" +Tests for LLMEnsemble in openevolve.llm.ensemble +""" + +import unittest +from openevolve.llm.ensemble import LLMEnsemble +from openevolve.config import LLMModelConfig + + +class TestLLMEnsemble(unittest.TestCase): + def test_weighted_sampling(self): + models = [ + LLMModelConfig(name="a", weight=0.0), + LLMModelConfig(name="b", weight=1.0), + ] + ensemble = LLMEnsemble(models) + # Should always sample model 'b' + for _ in range(10): + self.assertEqual(ensemble._sample_model().model, "b") + + models = [ + LLMModelConfig(name="a", weight=0.3), + LLMModelConfig(name="b", weight=0.3), + LLMModelConfig(name="c", weight=0.3), + ] + ensemble = LLMEnsemble(models) + # Should sample both models. Track sampled models in a set + sampled_models = set() + for _ in range(1000): + sampled_models.add(ensemble._sample_model().model) + # Cancel once we have both models + if len(sampled_models) == len(models): + break + self.assertEqual(len(sampled_models), len(models)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_prompt_sampler.py b/tests/test_prompt_sampler.py new file mode 100644 index 000000000..771a962b7 --- /dev/null +++ b/tests/test_prompt_sampler.py @@ -0,0 +1,53 @@ +""" +Tests for PromptSampler in openevolve.prompt.sampler +""" + +import unittest +from openevolve.config import Config +from openevolve.prompt.sampler import PromptSampler + + +class TestPromptSampler(unittest.TestCase): + """Tests for prompt sampler""" + + def setUp(self): + """Set up test prompt sampler""" + config = Config() + self.prompt_sampler = PromptSampler(config.prompt) + + def test_build_prompt(self): + """Test building a prompt""" + current_program = "def test(): pass" + parent_program = "def test(): pass" + program_metrics = {"score": 0.5} + previous_programs = [ + { + "id": "prev1", + "code": "def prev1(): pass", + "metrics": {"score": 0.4}, + } + ] + top_programs = [ + { + "id": "top1", + "code": "def top1(): pass", + "metrics": {"score": 0.6}, + } + ] + + prompt = self.prompt_sampler.build_prompt( + current_program=current_program, + parent_program=parent_program, + program_metrics=program_metrics, + previous_programs=previous_programs, + top_programs=top_programs, + ) + + self.assertIn("system", prompt) + self.assertIn("user", prompt) + self.assertIn("def test(): pass", prompt["user"]) + self.assertIn("score: 0.5", prompt["user"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_valid_configs.py b/tests/test_valid_configs.py index 829d23b42..ff631fbb9 100644 --- a/tests/test_valid_configs.py +++ b/tests/test_valid_configs.py @@ -4,6 +4,7 @@ import os import unittest +import itertools from unittest.mock import MagicMock, patch from openevolve.config import Config, load_config @@ -15,9 +16,9 @@ class TestConfigValidity(unittest.TestCase): def collect_files(self): """Collect all config/*config*.yaml and examples/**/*config*.yaml files""" config_dir = os.path.join(os.path.dirname(__file__), "../configs") - example_dir = os.path.join(os.path.dirname(__file__), "../examples") + examples_dir = os.path.join(os.path.dirname(__file__), "../examples") config_files = [] - for root, _, files in os.walk(config_dir): + for root, _, files in itertools.chain(os.walk(config_dir), os.walk(examples_dir)): for file in files: if "config" in file and file.endswith(".yaml"): config_files.append(os.path.join(root, file)) @@ -32,6 +33,7 @@ def test_import_config_files(self): self.assertIsInstance( config, Config, f"Config file {config_file} did not load correctly" ) + self.assertTrue(len(config.llm.models) > 0) if __name__ == "__main__":