diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index cb9b0c7f2..3e0acafcb 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -8,3 +8,4 @@ N_TESTS_TO_GENERATE = 2 TOTAL_LOOPING_TIME = 10.0 # 10 second candidate benchmarking budget COVERAGE_THRESHOLD = 60.0 +MIN_TESTCASE_PASSED_THRESHOLD = 6 diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py index f448cb27a..611b4dd98 100644 --- a/codeflash/result/critic.py +++ b/codeflash/result/critic.py @@ -4,7 +4,11 @@ from codeflash.cli_cmds.console import logger from codeflash.code_utils import env_utils -from codeflash.code_utils.config_consts import COVERAGE_THRESHOLD, MIN_IMPROVEMENT_THRESHOLD +from codeflash.code_utils.config_consts import ( + COVERAGE_THRESHOLD, + MIN_IMPROVEMENT_THRESHOLD, + MIN_TESTCASE_PASSED_THRESHOLD, +) from codeflash.models.models import TestType if TYPE_CHECKING: @@ -50,7 +54,7 @@ def quantity_of_tests_critic(candidate_result: OptimizedCandidateResult) -> bool for test_type in report: pass_count += report[test_type]["passed"] - if pass_count >= 4: + if pass_count >= MIN_TESTCASE_PASSED_THRESHOLD: return True # If only one test passed, check if it's a REPLAY_TEST return bool(pass_count == 1 and report[TestType.REPLAY_TEST]["passed"] == 1) diff --git a/tests/test_critic.py b/tests/test_critic.py index e60047125..569c2badb 100644 --- a/tests/test_critic.py +++ b/tests/test_critic.py @@ -195,8 +195,7 @@ def test_generated_test_critic() -> None: timed_out=False, loop_index=1, ) - - test_results = [test_1, test_2, test_3, test_7] + test_results = [test_1, test_2, test_3, test_4, test_5, test_6, test_7, test_1] candidate_result = OptimizedCandidateResult( max_loop_count=5, @@ -209,7 +208,7 @@ def test_generated_test_critic() -> None: assert quantity_of_tests_critic(candidate_result) - test_results = [test_1, test_2, test_3, test_6, test_7] + test_results = [test_1, test_2, test_3, test_6, test_7, test_1, test_4, test_1] candidate_result = OptimizedCandidateResult( max_loop_count=5, @@ -222,7 +221,7 @@ def test_generated_test_critic() -> None: assert quantity_of_tests_critic(candidate_result) - test_results = [test_1, test_3, test_4, test_2, test_7] + test_results = [test_1, test_3, test_4, test_2, test_7, test_1, test_6, test_1] candidate_result = OptimizedCandidateResult( max_loop_count=5, @@ -248,7 +247,7 @@ def test_generated_test_critic() -> None: assert not quantity_of_tests_critic(candidate_result) - test_results = [test_1, test_2, test_3, test_4, test_5] + test_results = [test_1, test_2, test_3, test_4, test_5, test_1, test_1, test_1] candidate_result = OptimizedCandidateResult( max_loop_count=5, @@ -287,7 +286,7 @@ def test_generated_test_critic() -> None: assert quantity_of_tests_critic(candidate_result) - test_results = [test_1, test_2, test_3, test_4, test_5] + test_results = [test_1, test_2, test_3, test_4, test_5, test_1, test_1, test_1] candidate_result = OptimizedCandidateResult( max_loop_count=5, @@ -328,7 +327,7 @@ def test_generated_test_critic() -> None: assert not quantity_of_tests_critic(candidate_result) - test_results = [test_1, test_2, test_3, test_5] + test_results = [test_1, test_2, test_3, test_5, test_1, test_1, test_1, test_1] candidate_result = OptimizedCandidateResult( max_loop_count=5,