From 310967494d8cb8918e97fade7990334577bf19e3 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Fri, 30 May 2025 19:08:32 -0700 Subject: [PATCH 1/3] bump to 6. I have never seen any good optimization with 5 passed test cases. --- codeflash/code_utils/config_consts.py | 1 + codeflash/result/critic.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index cb9b0c7f2..3e0acafcb 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -8,3 +8,4 @@ N_TESTS_TO_GENERATE = 2 TOTAL_LOOPING_TIME = 10.0 # 10 second candidate benchmarking budget COVERAGE_THRESHOLD = 60.0 +MIN_TESTCASE_PASSED_THRESHOLD = 6 diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py index f448cb27a..7a7a2db8c 100644 --- a/codeflash/result/critic.py +++ b/codeflash/result/critic.py @@ -4,7 +4,7 @@ from codeflash.cli_cmds.console import logger from codeflash.code_utils import env_utils -from codeflash.code_utils.config_consts import COVERAGE_THRESHOLD, MIN_IMPROVEMENT_THRESHOLD +from codeflash.code_utils.config_consts import COVERAGE_THRESHOLD, MIN_IMPROVEMENT_THRESHOLD, MIN_TESTCASE_PASSED_THRESHOLD from codeflash.models.models import TestType if TYPE_CHECKING: @@ -50,7 +50,7 @@ def quantity_of_tests_critic(candidate_result: OptimizedCandidateResult) -> bool for test_type in report: pass_count += report[test_type]["passed"] - if pass_count >= 4: + if pass_count >= MIN_TESTCASE_PASSED_THRESHOLD: return True # If only one test passed, check if it's a REPLAY_TEST return bool(pass_count == 1 and report[TestType.REPLAY_TEST]["passed"] == 1) From 792819a42395293f143104890a929767b7f78e04 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 30 May 2025 19:47:54 -0700 Subject: [PATCH 2/3] Update critic.py --- codeflash/result/critic.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py index 7a7a2db8c..611b4dd98 100644 --- a/codeflash/result/critic.py +++ b/codeflash/result/critic.py @@ -4,7 +4,11 @@ from codeflash.cli_cmds.console import logger from codeflash.code_utils import env_utils -from codeflash.code_utils.config_consts import COVERAGE_THRESHOLD, MIN_IMPROVEMENT_THRESHOLD, MIN_TESTCASE_PASSED_THRESHOLD +from codeflash.code_utils.config_consts import ( + COVERAGE_THRESHOLD, + MIN_IMPROVEMENT_THRESHOLD, + MIN_TESTCASE_PASSED_THRESHOLD, +) from codeflash.models.models import TestType if TYPE_CHECKING: From 4f3b870d181ada266c454c39929c72ba48b64e7a Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Sun, 1 Jun 2025 19:15:24 -0700 Subject: [PATCH 3/3] more tests need to pass --- tests/test_critic.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/test_critic.py b/tests/test_critic.py index e60047125..569c2badb 100644 --- a/tests/test_critic.py +++ b/tests/test_critic.py @@ -195,8 +195,7 @@ def test_generated_test_critic() -> None: timed_out=False, loop_index=1, ) - - test_results = [test_1, test_2, test_3, test_7] + test_results = [test_1, test_2, test_3, test_4, test_5, test_6, test_7, test_1] candidate_result = OptimizedCandidateResult( max_loop_count=5, @@ -209,7 +208,7 @@ def test_generated_test_critic() -> None: assert quantity_of_tests_critic(candidate_result) - test_results = [test_1, test_2, test_3, test_6, test_7] + test_results = [test_1, test_2, test_3, test_6, test_7, test_1, test_4, test_1] candidate_result = OptimizedCandidateResult( max_loop_count=5, @@ -222,7 +221,7 @@ def test_generated_test_critic() -> None: assert quantity_of_tests_critic(candidate_result) - test_results = [test_1, test_3, test_4, test_2, test_7] + test_results = [test_1, test_3, test_4, test_2, test_7, test_1, test_6, test_1] candidate_result = OptimizedCandidateResult( max_loop_count=5, @@ -248,7 +247,7 @@ def test_generated_test_critic() -> None: assert not quantity_of_tests_critic(candidate_result) - test_results = [test_1, test_2, test_3, test_4, test_5] + test_results = [test_1, test_2, test_3, test_4, test_5, test_1, test_1, test_1] candidate_result = OptimizedCandidateResult( max_loop_count=5, @@ -287,7 +286,7 @@ def test_generated_test_critic() -> None: assert quantity_of_tests_critic(candidate_result) - test_results = [test_1, test_2, test_3, test_4, test_5] + test_results = [test_1, test_2, test_3, test_4, test_5, test_1, test_1, test_1] candidate_result = OptimizedCandidateResult( max_loop_count=5, @@ -328,7 +327,7 @@ def test_generated_test_critic() -> None: assert not quantity_of_tests_critic(candidate_result) - test_results = [test_1, test_2, test_3, test_5] + test_results = [test_1, test_2, test_3, test_5, test_1, test_1, test_1, test_1] candidate_result = OptimizedCandidateResult( max_loop_count=5,