From 856fe60d53ea3723d73e5232e27cd65d4aba74fc Mon Sep 17 00:00:00 2001 From: Saga4 Date: Fri, 12 Sep 2025 06:01:31 +0530 Subject: [PATCH 1/4] LSP reduce no of candidates --- codeflash/code_utils/config_consts.py | 23 ++++++++++ codeflash/code_utils/git_utils.py | 4 +- codeflash/optimization/function_optimizer.py | 47 ++++++++++++-------- codeflash/verification/test_runner.py | 8 ++-- 4 files changed, 57 insertions(+), 25 deletions(-) diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index 50b4bce16..b1c783a32 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -11,3 +11,26 @@ MIN_TESTCASE_PASSED_THRESHOLD = 6 REPEAT_OPTIMIZATION_PROBABILITY = 0.1 DEFAULT_IMPORTANCE_THRESHOLD = 0.001 + +# LSP-specific +N_CANDIDATES_LSP = 3 +N_TESTS_TO_GENERATE_LSP = 1 +TOTAL_LOOPING_TIME_LSP = 5.0 + + +def get_n_candidates() -> int: + from codeflash.lsp.helpers import is_LSP_enabled + + return N_CANDIDATES_LSP if is_LSP_enabled() else N_CANDIDATES + + +def get_n_tests_to_generate() -> int: + from codeflash.lsp.helpers import is_LSP_enabled + + return N_TESTS_TO_GENERATE_LSP if is_LSP_enabled() else N_TESTS_TO_GENERATE + + +def get_total_looping_time() -> float: + from codeflash.lsp.helpers import is_LSP_enabled + + return TOTAL_LOOPING_TIME_LSP if is_LSP_enabled() else TOTAL_LOOPING_TIME diff --git a/codeflash/code_utils/git_utils.py b/codeflash/code_utils/git_utils.py index a445576e0..dbff075ba 100644 --- a/codeflash/code_utils/git_utils.py +++ b/codeflash/code_utils/git_utils.py @@ -16,7 +16,7 @@ from unidiff import PatchSet from codeflash.cli_cmds.console import logger -from codeflash.code_utils.config_consts import N_CANDIDATES +from codeflash.code_utils.config_consts import get_n_candidates if TYPE_CHECKING: from git import Repo @@ -164,7 +164,7 @@ def create_git_worktrees( ) -> tuple[Path | None, list[Path]]: if git_root and worktree_root_dir: worktree_root = Path(tempfile.mkdtemp(dir=worktree_root_dir)) - worktrees = [Path(tempfile.mkdtemp(dir=worktree_root)) for _ in range(N_CANDIDATES + 1)] + worktrees = [Path(tempfile.mkdtemp(dir=worktree_root)) for _ in range(get_n_candidates() + 1)] for worktree in worktrees: subprocess.run(["git", "worktree", "add", "-d", worktree], cwd=module_root, check=True) else: diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index c523dcbce..c24302e43 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -42,10 +42,10 @@ ) from codeflash.code_utils.config_consts import ( INDIVIDUAL_TESTCASE_TIMEOUT, - N_CANDIDATES, - N_TESTS_TO_GENERATE, REPEAT_OPTIMIZATION_PROBABILITY, - TOTAL_LOOPING_TIME, + get_n_candidates, + get_n_tests_to_generate, + get_total_looping_time, ) from codeflash.code_utils.edit_generated_tests import ( add_runtime_comments_to_generated_tests, @@ -227,8 +227,9 @@ def __init__( self.generate_and_instrument_tests_results: ( tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet] | None ) = None + n_tests = get_n_tests_to_generate() self.executor = concurrent.futures.ThreadPoolExecutor( - max_workers=N_TESTS_TO_GENERATE + 2 if self.experiment_id is None else N_TESTS_TO_GENERATE + 3 + max_workers=n_tests + 2 if self.experiment_id is None else n_tests + 3 ) def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]: @@ -278,17 +279,18 @@ def generate_and_instrument_tests( ] ]: """Generate and instrument tests, returning all necessary data for optimization.""" + n_tests = get_n_tests_to_generate() generated_test_paths = [ get_test_file_path( self.test_cfg.tests_root, self.function_to_optimize.function_name, test_index, test_type="unit" ) - for test_index in range(N_TESTS_TO_GENERATE) + for test_index in range(n_tests) ] generated_perf_test_paths = [ get_test_file_path( self.test_cfg.tests_root, self.function_to_optimize.function_name, test_index, test_type="perf" ) - for test_index in range(N_TESTS_TO_GENERATE) + for test_index in range(n_tests) ] with progress_bar( @@ -971,7 +973,8 @@ def generate_tests_and_optimizations( generated_perf_test_paths: list[Path], run_experiment: bool = False, # noqa: FBT001, FBT002 ) -> Result[tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet], str]: - assert len(generated_test_paths) == N_TESTS_TO_GENERATE + n_tests = get_n_tests_to_generate() + assert len(generated_test_paths) == n_tests console.rule() # Submit the test generation task as future future_tests = self.submit_test_generation_tasks( @@ -981,12 +984,13 @@ def generate_tests_and_optimizations( generated_test_paths, generated_perf_test_paths, ) + n_candidates = get_n_candidates() future_optimization_candidates = self.executor.submit( self.aiservice_client.optimize_python_code, read_writable_code.markdown, read_only_context_code, self.function_trace_id[:-4] + "EXP0" if run_experiment else self.function_trace_id, - N_CANDIDATES, + n_candidates, ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None, ) future_candidates_exp = None @@ -1001,7 +1005,7 @@ def generate_tests_and_optimizations( read_writable_code.markdown, read_only_context_code, self.function_trace_id[:-4] + "EXP1", - N_CANDIDATES, + n_candidates, ExperimentMetadata(id=self.experiment_id, group="experiment"), ) futures.append(future_candidates_exp) @@ -1375,12 +1379,13 @@ def establish_original_code_baseline( instrument_codeflash_capture( self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root ) + total_looping_time = get_total_looping_time() behavioral_results, coverage_results = self.run_and_parse_tests( testing_type=TestingMode.BEHAVIOR, test_env=test_env, test_files=self.test_files, optimization_iteration=0, - testing_time=TOTAL_LOOPING_TIME, + testing_time=total_looping_time, enable_coverage=test_framework == "pytest", code_context=code_context, ) @@ -1407,7 +1412,7 @@ def establish_original_code_baseline( test_env=test_env, test_files=self.test_files, optimization_iteration=0, - testing_time=TOTAL_LOOPING_TIME, + testing_time=total_looping_time, enable_coverage=False, code_context=code_context, ) @@ -1415,7 +1420,7 @@ def establish_original_code_baseline( benchmarking_results = TestResults() start_time: float = time.time() for i in range(100): - if i >= 5 and time.time() - start_time >= TOTAL_LOOPING_TIME * 1.5: + if i >= 5 and time.time() - start_time >= total_looping_time * 1.5: # * 1.5 to give unittest a bit more time to run break test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1) @@ -1424,7 +1429,7 @@ def establish_original_code_baseline( test_env=test_env, test_files=self.test_files, optimization_iteration=0, - testing_time=TOTAL_LOOPING_TIME, + testing_time=total_looping_time, enable_coverage=False, code_context=code_context, unittest_loop_index=i + 1, @@ -1514,12 +1519,13 @@ def run_optimized_candidate( self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root ) + total_looping_time = get_total_looping_time() candidate_behavior_results, _ = self.run_and_parse_tests( testing_type=TestingMode.BEHAVIOR, test_env=test_env, test_files=self.test_files, optimization_iteration=optimization_candidate_index, - testing_time=TOTAL_LOOPING_TIME, + testing_time=total_looping_time, enable_coverage=False, ) # Remove instrumentation @@ -1548,7 +1554,7 @@ def run_optimized_candidate( test_env=test_env, test_files=self.test_files, optimization_iteration=optimization_candidate_index, - testing_time=TOTAL_LOOPING_TIME, + testing_time=total_looping_time, enable_coverage=False, ) loop_count = ( @@ -1566,7 +1572,7 @@ def run_optimized_candidate( start_time: float = time.time() loop_count = 0 for i in range(100): - if i >= 5 and time.time() - start_time >= TOTAL_LOOPING_TIME * 1.5: + if i >= 5 and time.time() - start_time >= get_total_looping_time() * 1.5: # * 1.5 to give unittest a bit more time to run break test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1) @@ -1575,7 +1581,7 @@ def run_optimized_candidate( test_env=test_env, test_files=self.test_files, optimization_iteration=optimization_candidate_index, - testing_time=TOTAL_LOOPING_TIME, + testing_time=get_total_looping_time(), unittest_loop_index=i + 1, ) loop_count = i + 1 @@ -1614,7 +1620,7 @@ def run_and_parse_tests( test_env: dict[str, str], test_files: TestFiles, optimization_iteration: int, - testing_time: float = TOTAL_LOOPING_TIME, + testing_time: float = get_total_looping_time(), *, enable_coverage: bool = False, pytest_min_loops: int = 5, @@ -1753,6 +1759,9 @@ def line_profiler_step( self, code_context: CodeOptimizationContext, original_helper_code: dict[Path, str], candidate_index: int ) -> dict: try: + logger.info("Running line profiling to identify performance bottlenecks…") + console.rule() + test_env = self.get_test_env( codeflash_loop_index=0, codeflash_test_iteration=candidate_index, codeflash_tracer_disable=1 ) @@ -1762,7 +1771,7 @@ def line_profiler_step( test_env=test_env, test_files=self.test_files, optimization_iteration=0, - testing_time=TOTAL_LOOPING_TIME, + testing_time=get_total_looping_time(), enable_coverage=False, code_context=code_context, line_profiler_output_file=line_profiler_output_file, diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py index 85e347641..a0ad8fd66 100644 --- a/codeflash/verification/test_runner.py +++ b/codeflash/verification/test_runner.py @@ -8,7 +8,7 @@ from codeflash.cli_cmds.console import logger from codeflash.code_utils.code_utils import custom_addopts, get_run_tmp_file from codeflash.code_utils.compat import IS_POSIX, SAFE_SYS_EXECUTABLE -from codeflash.code_utils.config_consts import TOTAL_LOOPING_TIME +from codeflash.code_utils.config_consts import get_total_looping_time from codeflash.code_utils.coverage_utils import prepare_coverage_files from codeflash.models.models import TestFiles, TestType @@ -37,7 +37,7 @@ def run_behavioral_tests( pytest_timeout: int | None = None, pytest_cmd: str = "pytest", verbose: bool = False, - pytest_target_runtime_seconds: int = TOTAL_LOOPING_TIME, + pytest_target_runtime_seconds: int = get_total_looping_time(), enable_coverage: bool = False, ) -> tuple[Path, subprocess.CompletedProcess, Path | None, Path | None]: if test_framework == "pytest": @@ -151,7 +151,7 @@ def run_line_profile_tests( cwd: Path, test_framework: str, *, - pytest_target_runtime_seconds: float = TOTAL_LOOPING_TIME, + pytest_target_runtime_seconds: float = get_total_looping_time(), verbose: bool = False, pytest_timeout: int | None = None, pytest_min_loops: int = 5, # noqa: ARG001 @@ -237,7 +237,7 @@ def run_benchmarking_tests( cwd: Path, test_framework: str, *, - pytest_target_runtime_seconds: float = TOTAL_LOOPING_TIME, + pytest_target_runtime_seconds: float = get_total_looping_time(), verbose: bool = False, pytest_timeout: int | None = None, pytest_min_loops: int = 5, From 64bdb066473dfcf55d2125c740925893c6d808ab Mon Sep 17 00:00:00 2001 From: Saga4 Date: Sat, 13 Sep 2025 04:36:44 +0530 Subject: [PATCH 2/4] config revert --- codeflash/code_utils/config_consts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index b1c783a32..a9fe49bd7 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -15,7 +15,7 @@ # LSP-specific N_CANDIDATES_LSP = 3 N_TESTS_TO_GENERATE_LSP = 1 -TOTAL_LOOPING_TIME_LSP = 5.0 +TOTAL_LOOPING_TIME_LSP = 10.0 # Kept same timing for LSP mode to avoid in increase in performance reporting def get_n_candidates() -> int: From 520d87c8393a0a5d08bf799bf0d417f5299165ae Mon Sep 17 00:00:00 2001 From: saga4 Date: Mon, 22 Sep 2025 14:57:42 -0700 Subject: [PATCH 3/4] pass reference values to aiservices --- codeflash/api/aiservice.py | 3 +++ codeflash/code_utils/config_consts.py | 10 +++++++++- codeflash/optimization/function_optimizer.py | 3 ++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 0df9f25b2..023e477a1 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -10,6 +10,7 @@ from pydantic.json import pydantic_encoder from codeflash.cli_cmds.console import console, logger +from codeflash.code_utils.config_consts import get_n_candidates, get_n_candidates_lp from codeflash.code_utils.env_utils import get_codeflash_api_key from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name from codeflash.lsp.helpers import is_LSP_enabled @@ -131,6 +132,7 @@ def optimize_python_code( # noqa: D417 "current_username": get_last_commit_author_if_pr_exists(None), "repo_owner": git_repo_owner, "repo_name": git_repo_name, + "n_candidates": get_n_candidates(), } logger.info("Generating optimized candidates…") @@ -192,6 +194,7 @@ def optimize_python_code_line_profiler( # noqa: D417 "experiment_metadata": experiment_metadata, "codeflash_version": codeflash_version, "lsp_mode": is_LSP_enabled(), + "n_candidates_lp": get_n_candidates_lp(), } logger.info("Generating optimized candidates…") diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index a9fe49bd7..d5c4cdc48 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -11,11 +11,13 @@ MIN_TESTCASE_PASSED_THRESHOLD = 6 REPEAT_OPTIMIZATION_PROBABILITY = 0.1 DEFAULT_IMPORTANCE_THRESHOLD = 0.001 +N_CANDIDATES_LP = 6 # LSP-specific N_CANDIDATES_LSP = 3 -N_TESTS_TO_GENERATE_LSP = 1 +N_TESTS_TO_GENERATE_LSP = 2 TOTAL_LOOPING_TIME_LSP = 10.0 # Kept same timing for LSP mode to avoid in increase in performance reporting +N_CANDIDATES_LP_LSP = 3 def get_n_candidates() -> int: @@ -24,6 +26,12 @@ def get_n_candidates() -> int: return N_CANDIDATES_LSP if is_LSP_enabled() else N_CANDIDATES +def get_n_candidates_lp() -> int: + from codeflash.lsp.helpers import is_LSP_enabled + + return N_CANDIDATES_LP_LSP if is_LSP_enabled() else N_CANDIDATES_LP + + def get_n_tests_to_generate() -> int: from codeflash.lsp.helpers import is_LSP_enabled diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 1e0a3af37..a95da65e5 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -45,6 +45,7 @@ INDIVIDUAL_TESTCASE_TIMEOUT, REPEAT_OPTIMIZATION_PROBABILITY, get_n_candidates, + get_n_candidates_lp, get_n_tests_to_generate, get_total_looping_time, ) @@ -476,7 +477,7 @@ def determine_best_candidate( dependency_code=code_context.read_only_context_code, trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id, line_profiler_results=original_code_baseline.line_profile_results["str_out"], - num_candidates=10, + num_candidates=get_n_candidates_lp(), experiment_metadata=ExperimentMetadata( id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment" ) From 8f5423b482a0de0863ec58eea5b54145507c9975 Mon Sep 17 00:00:00 2001 From: ali Date: Tue, 23 Sep 2025 01:52:38 +0300 Subject: [PATCH 4/4] line profiling loading msg --- codeflash/optimization/function_optimizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index e1820005a..40c5cd8d4 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -1509,7 +1509,7 @@ def establish_original_code_baseline( ) if test_framework == "pytest": - with progress_bar("Performing detailed line profiling..."): + with progress_bar("Running line profiling to identify performance bottlenecks..."): line_profile_results = self.line_profiler_step( code_context=code_context, original_helper_code=original_helper_code, candidate_index=0 )