diff --git a/openevolve/prompt/sampler.py b/openevolve/prompt/sampler.py index af0d2eb04..81821651e 100644 --- a/openevolve/prompt/sampler.py +++ b/openevolve/prompt/sampler.py @@ -186,12 +186,16 @@ def _identify_improvement_areas( # Only compare numeric metrics if not isinstance(value, (int, float)) or isinstance(value, bool): continue + + # if metric ends with (-), then it is a metric that we want to minimize + if metric.endswith("(-)"): + value = -value improved = True regressed = True for attempt in recent_attempts: - attempt_value = attempt["metrics"].get(metric, 0) + attempt_value = -attempt["metrics"].get(metric, 0) if metric.endswith("(-)") else attempt["metrics"].get(metric, 0) # Only compare if both values are numeric if isinstance(value, (int, float)) and isinstance(attempt_value, (int, float)): if attempt_value <= value: diff --git a/tests/test_prompt_sampler.py b/tests/test_prompt_sampler.py index 771a962b7..66ccde572 100644 --- a/tests/test_prompt_sampler.py +++ b/tests/test_prompt_sampler.py @@ -48,6 +48,61 @@ def test_build_prompt(self): self.assertIn("def test(): pass", prompt["user"]) self.assertIn("score: 0.5", prompt["user"]) + def test_metric_minimization_feature(self): + """Test that metrics starting with '-' are handled correctly for minimization""" + current_program = "def test(): pass" + parent_program = "def test(): pass" + + # Test with both regular and minimization metrics + program_metrics = { + "improvement" : 0.3, + "improvement(-)" : 0.1, + "mixed" : 0.3, + "mixed(-)" : 0.3, + "regression" : 0.1, + "regression(-)" : 0.5, + } + + # Create previous programs with different metric values to test comparison logic + previous_programs = [ + { + "id": "prev1", + "code": "def prev1(): pass", + "metrics": { + "improvement" : 0.1, + "improvement(-)" : 0.2, + "mixed" : 0.1, + "mixed(-)" : 0.5, + "regression" : 0.5, + "regression(-)" : 0.3, + }, + }, + { + "id": "prev2", + "code": "def prev2(): pass", + "metrics": { + "improvement" : 0.2, + "improvement(-)" : 0.3, + "mixed" : 0.5, + "mixed(-)" : 0.1, + "regression" : 0.7, + "regression(-)" : 0.2, + }, + } + ] + response = self.prompt_sampler._identify_improvement_areas( + current_program=current_program, + parent_program=parent_program, + metrics=program_metrics, + previous_programs=previous_programs + ) + expected_response = [ + "- Metrics showing improvement: improvement, improvement(-). Consider continuing with similar changes.", + "- Metrics showing regression: regression, regression(-). Consider reverting or revising recent changes in these areas." + ] + expected_response = "\n".join(expected_response) + self.assertEqual(response, expected_response) + if __name__ == "__main__": unittest.main()