diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index ca7d13425..ded1b64ee 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -10,6 +10,7 @@ from pydantic.json import pydantic_encoder from codeflash.cli_cmds.console import console, logger +from codeflash.code_utils.config_consts import get_n_candidates, get_n_candidates_lp from codeflash.code_utils.env_utils import get_codeflash_api_key from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name from codeflash.lsp.helpers import is_LSP_enabled @@ -131,6 +132,7 @@ def optimize_python_code( # noqa: D417 "current_username": get_last_commit_author_if_pr_exists(None), "repo_owner": git_repo_owner, "repo_name": git_repo_name, + "n_candidates": get_n_candidates(), } logger.info("!lsp|Generating optimized candidates…") @@ -192,6 +194,7 @@ def optimize_python_code_line_profiler( # noqa: D417 "experiment_metadata": experiment_metadata, "codeflash_version": codeflash_version, "lsp_mode": is_LSP_enabled(), + "n_candidates_lp": get_n_candidates_lp(), } console.rule() diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index 50b4bce16..d5c4cdc48 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -11,3 +11,34 @@ MIN_TESTCASE_PASSED_THRESHOLD = 6 REPEAT_OPTIMIZATION_PROBABILITY = 0.1 DEFAULT_IMPORTANCE_THRESHOLD = 0.001 +N_CANDIDATES_LP = 6 + +# LSP-specific +N_CANDIDATES_LSP = 3 +N_TESTS_TO_GENERATE_LSP = 2 +TOTAL_LOOPING_TIME_LSP = 10.0 # Kept same timing for LSP mode to avoid in increase in performance reporting +N_CANDIDATES_LP_LSP = 3 + + +def get_n_candidates() -> int: + from codeflash.lsp.helpers import is_LSP_enabled + + return N_CANDIDATES_LSP if is_LSP_enabled() else N_CANDIDATES + + +def get_n_candidates_lp() -> int: + from codeflash.lsp.helpers import is_LSP_enabled + + return N_CANDIDATES_LP_LSP if is_LSP_enabled() else N_CANDIDATES_LP + + +def get_n_tests_to_generate() -> int: + from codeflash.lsp.helpers import is_LSP_enabled + + return N_TESTS_TO_GENERATE_LSP if is_LSP_enabled() else N_TESTS_TO_GENERATE + + +def get_total_looping_time() -> float: + from codeflash.lsp.helpers import is_LSP_enabled + + return TOTAL_LOOPING_TIME_LSP if is_LSP_enabled() else TOTAL_LOOPING_TIME diff --git a/codeflash/code_utils/git_utils.py b/codeflash/code_utils/git_utils.py index a445576e0..dbff075ba 100644 --- a/codeflash/code_utils/git_utils.py +++ b/codeflash/code_utils/git_utils.py @@ -16,7 +16,7 @@ from unidiff import PatchSet from codeflash.cli_cmds.console import logger -from codeflash.code_utils.config_consts import N_CANDIDATES +from codeflash.code_utils.config_consts import get_n_candidates if TYPE_CHECKING: from git import Repo @@ -164,7 +164,7 @@ def create_git_worktrees( ) -> tuple[Path | None, list[Path]]: if git_root and worktree_root_dir: worktree_root = Path(tempfile.mkdtemp(dir=worktree_root_dir)) - worktrees = [Path(tempfile.mkdtemp(dir=worktree_root)) for _ in range(N_CANDIDATES + 1)] + worktrees = [Path(tempfile.mkdtemp(dir=worktree_root)) for _ in range(get_n_candidates() + 1)] for worktree in worktrees: subprocess.run(["git", "worktree", "add", "-d", worktree], cwd=module_root, check=True) else: diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 52a0b6d57..40c5cd8d4 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -44,10 +44,11 @@ from codeflash.code_utils.config_consts import ( COVERAGE_THRESHOLD, INDIVIDUAL_TESTCASE_TIMEOUT, - N_CANDIDATES, - N_TESTS_TO_GENERATE, REPEAT_OPTIMIZATION_PROBABILITY, - TOTAL_LOOPING_TIME, + get_n_candidates, + get_n_candidates_lp, + get_n_tests_to_generate, + get_total_looping_time, ) from codeflash.code_utils.deduplicate_code import normalize_code from codeflash.code_utils.edit_generated_tests import ( @@ -236,8 +237,9 @@ def __init__( self.generate_and_instrument_tests_results: ( tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet] | None ) = None + n_tests = get_n_tests_to_generate() self.executor = concurrent.futures.ThreadPoolExecutor( - max_workers=N_TESTS_TO_GENERATE + 2 if self.experiment_id is None else N_TESTS_TO_GENERATE + 3 + max_workers=n_tests + 2 if self.experiment_id is None else n_tests + 3 ) def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]: @@ -287,17 +289,18 @@ def generate_and_instrument_tests( ] ]: """Generate and instrument tests, returning all necessary data for optimization.""" + n_tests = get_n_tests_to_generate() generated_test_paths = [ get_test_file_path( self.test_cfg.tests_root, self.function_to_optimize.function_name, test_index, test_type="unit" ) - for test_index in range(N_TESTS_TO_GENERATE) + for test_index in range(n_tests) ] generated_perf_test_paths = [ get_test_file_path( self.test_cfg.tests_root, self.function_to_optimize.function_name, test_index, test_type="perf" ) - for test_index in range(N_TESTS_TO_GENERATE) + for test_index in range(n_tests) ] with progress_bar( @@ -484,7 +487,7 @@ def determine_best_candidate( dependency_code=code_context.read_only_context_code, trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id, line_profiler_results=original_code_baseline.line_profile_results["str_out"], - num_candidates=10, + num_candidates=get_n_candidates_lp(), experiment_metadata=ExperimentMetadata( id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment" ) @@ -1058,7 +1061,8 @@ def generate_tests_and_optimizations( generated_perf_test_paths: list[Path], run_experiment: bool = False, # noqa: FBT001, FBT002 ) -> Result[tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet], str]: - assert len(generated_test_paths) == N_TESTS_TO_GENERATE + n_tests = get_n_tests_to_generate() + assert len(generated_test_paths) == n_tests console.rule() # Submit the test generation task as future future_tests = self.submit_test_generation_tasks( @@ -1068,12 +1072,13 @@ def generate_tests_and_optimizations( generated_test_paths, generated_perf_test_paths, ) + n_candidates = get_n_candidates() future_optimization_candidates = self.executor.submit( self.aiservice_client.optimize_python_code, read_writable_code.markdown, read_only_context_code, self.function_trace_id[:-4] + "EXP0" if run_experiment else self.function_trace_id, - N_CANDIDATES, + n_candidates, ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None, ) future_candidates_exp = None @@ -1088,7 +1093,7 @@ def generate_tests_and_optimizations( read_writable_code.markdown, read_only_context_code, self.function_trace_id[:-4] + "EXP1", - N_CANDIDATES, + n_candidates, ExperimentMetadata(id=self.experiment_id, group="experiment"), ) futures.append(future_candidates_exp) @@ -1477,12 +1482,13 @@ def establish_original_code_baseline( instrument_codeflash_capture( self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root ) + total_looping_time = get_total_looping_time() behavioral_results, coverage_results = self.run_and_parse_tests( testing_type=TestingMode.BEHAVIOR, test_env=test_env, test_files=self.test_files, optimization_iteration=0, - testing_time=TOTAL_LOOPING_TIME, + testing_time=total_looping_time, enable_coverage=test_framework == "pytest", code_context=code_context, ) @@ -1503,7 +1509,7 @@ def establish_original_code_baseline( ) if test_framework == "pytest": - with progress_bar("Performing detailed line profiling..."): + with progress_bar("Running line profiling to identify performance bottlenecks..."): line_profile_results = self.line_profiler_step( code_context=code_context, original_helper_code=original_helper_code, candidate_index=0 ) @@ -1514,7 +1520,7 @@ def establish_original_code_baseline( test_env=test_env, test_files=self.test_files, optimization_iteration=0, - testing_time=TOTAL_LOOPING_TIME, + testing_time=total_looping_time, enable_coverage=False, code_context=code_context, ) @@ -1522,7 +1528,7 @@ def establish_original_code_baseline( benchmarking_results = TestResults() start_time: float = time.time() for i in range(100): - if i >= 5 and time.time() - start_time >= TOTAL_LOOPING_TIME * 1.5: + if i >= 5 and time.time() - start_time >= total_looping_time * 1.5: # * 1.5 to give unittest a bit more time to run break test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1) @@ -1532,7 +1538,7 @@ def establish_original_code_baseline( test_env=test_env, test_files=self.test_files, optimization_iteration=0, - testing_time=TOTAL_LOOPING_TIME, + testing_time=total_looping_time, enable_coverage=False, code_context=code_context, unittest_loop_index=i + 1, @@ -1617,12 +1623,13 @@ def run_optimized_candidate( self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root ) + total_looping_time = get_total_looping_time() candidate_behavior_results, _ = self.run_and_parse_tests( testing_type=TestingMode.BEHAVIOR, test_env=test_env, test_files=self.test_files, optimization_iteration=optimization_candidate_index, - testing_time=TOTAL_LOOPING_TIME, + testing_time=total_looping_time, enable_coverage=False, ) # Remove instrumentation @@ -1653,7 +1660,7 @@ def run_optimized_candidate( test_env=test_env, test_files=self.test_files, optimization_iteration=optimization_candidate_index, - testing_time=TOTAL_LOOPING_TIME, + testing_time=total_looping_time, enable_coverage=False, ) loop_count = ( @@ -1671,7 +1678,7 @@ def run_optimized_candidate( start_time: float = time.time() loop_count = 0 for i in range(100): - if i >= 5 and time.time() - start_time >= TOTAL_LOOPING_TIME * 1.5: + if i >= 5 and time.time() - start_time >= get_total_looping_time() * 1.5: # * 1.5 to give unittest a bit more time to run break test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1) @@ -1680,7 +1687,7 @@ def run_optimized_candidate( test_env=test_env, test_files=self.test_files, optimization_iteration=optimization_candidate_index, - testing_time=TOTAL_LOOPING_TIME, + testing_time=get_total_looping_time(), unittest_loop_index=i + 1, ) loop_count = i + 1 @@ -1719,7 +1726,7 @@ def run_and_parse_tests( test_env: dict[str, str], test_files: TestFiles, optimization_iteration: int, - testing_time: float = TOTAL_LOOPING_TIME, + testing_time: float = get_total_looping_time(), *, enable_coverage: bool = False, pytest_min_loops: int = 5, @@ -1858,6 +1865,9 @@ def line_profiler_step( self, code_context: CodeOptimizationContext, original_helper_code: dict[Path, str], candidate_index: int ) -> dict: try: + logger.info("Running line profiling to identify performance bottlenecks…") + console.rule() + test_env = self.get_test_env( codeflash_loop_index=0, codeflash_test_iteration=candidate_index, codeflash_tracer_disable=1 ) @@ -1867,7 +1877,7 @@ def line_profiler_step( test_env=test_env, test_files=self.test_files, optimization_iteration=0, - testing_time=TOTAL_LOOPING_TIME, + testing_time=get_total_looping_time(), enable_coverage=False, code_context=code_context, line_profiler_output_file=line_profiler_output_file, diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py index 85e347641..a0ad8fd66 100644 --- a/codeflash/verification/test_runner.py +++ b/codeflash/verification/test_runner.py @@ -8,7 +8,7 @@ from codeflash.cli_cmds.console import logger from codeflash.code_utils.code_utils import custom_addopts, get_run_tmp_file from codeflash.code_utils.compat import IS_POSIX, SAFE_SYS_EXECUTABLE -from codeflash.code_utils.config_consts import TOTAL_LOOPING_TIME +from codeflash.code_utils.config_consts import get_total_looping_time from codeflash.code_utils.coverage_utils import prepare_coverage_files from codeflash.models.models import TestFiles, TestType @@ -37,7 +37,7 @@ def run_behavioral_tests( pytest_timeout: int | None = None, pytest_cmd: str = "pytest", verbose: bool = False, - pytest_target_runtime_seconds: int = TOTAL_LOOPING_TIME, + pytest_target_runtime_seconds: int = get_total_looping_time(), enable_coverage: bool = False, ) -> tuple[Path, subprocess.CompletedProcess, Path | None, Path | None]: if test_framework == "pytest": @@ -151,7 +151,7 @@ def run_line_profile_tests( cwd: Path, test_framework: str, *, - pytest_target_runtime_seconds: float = TOTAL_LOOPING_TIME, + pytest_target_runtime_seconds: float = get_total_looping_time(), verbose: bool = False, pytest_timeout: int | None = None, pytest_min_loops: int = 5, # noqa: ARG001 @@ -237,7 +237,7 @@ def run_benchmarking_tests( cwd: Path, test_framework: str, *, - pytest_target_runtime_seconds: float = TOTAL_LOOPING_TIME, + pytest_target_runtime_seconds: float = get_total_looping_time(), verbose: bool = False, pytest_timeout: int | None = None, pytest_min_loops: int = 5,