From c14f6b257086bedf985c259d8aeafab0dfe3854b Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 7 Nov 2025 23:24:25 -0800 Subject: [PATCH 01/12] 5% --- codeflash/code_utils/config_consts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index 6b2805fbf..1c7f292f0 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -3,7 +3,7 @@ MAX_FUNCTION_TEST_SECONDS = 60 N_CANDIDATES = 5 MIN_IMPROVEMENT_THRESHOLD = 0.05 -MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.10 # 10% minimum improvement for async throughput +MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.05 # 5% minimum improvement for async throughput MAX_TEST_FUNCTION_RUNS = 50 MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms N_TESTS_TO_GENERATE = 2 From 2f17942406b9831c96564679ce0a05c154c957e6 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 7 Nov 2025 23:42:22 -0800 Subject: [PATCH 02/12] better reporting for throughput --- codeflash/optimization/function_optimizer.py | 57 ++++++++++++-------- uv.lock | 11 ++++ 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 5f4ab8767..0bb7d2107 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -607,26 +607,32 @@ def determine_best_candidate( original_async_throughput=original_code_baseline.async_throughput, best_throughput_until_now=None, ) and quantity_of_tests_critic(candidate_result): - tree.add("This candidate is faster than the original code. πŸš€") # TODO: Change this description - tree.add(f"Original summed runtime: {humanize_runtime(original_code_baseline.runtime)}") - tree.add( - f"Best summed runtime: {humanize_runtime(candidate_result.best_test_runtime)} " - f"(measured over {candidate_result.max_loop_count} " - f"loop{'s' if candidate_result.max_loop_count > 1 else ''})" - ) - tree.add(f"Speedup percentage: {perf_gain * 100:.1f}%") - tree.add(f"Speedup ratio: {perf_gain + 1:.3f}X") - if ( + # For async functions, prioritize throughput metrics over runtime + is_async = ( original_code_baseline.async_throughput is not None and candidate_result.async_throughput is not None - ): + ) + + if is_async: throughput_gain_value = throughput_gain( original_throughput=original_code_baseline.async_throughput, optimized_throughput=candidate_result.async_throughput, ) + tree.add("This candidate has better async throughput than the original code. πŸš€") tree.add(f"Original async throughput: {original_code_baseline.async_throughput} executions") tree.add(f"Optimized async throughput: {candidate_result.async_throughput} executions") tree.add(f"Throughput improvement: {throughput_gain_value * 100:.1f}%") + tree.add(f"Throughput ratio: {throughput_gain_value + 1:.3f}X") + else: + tree.add("This candidate is faster than the original code. πŸš€") + tree.add(f"Original summed runtime: {humanize_runtime(original_code_baseline.runtime)}") + tree.add( + f"Best summed runtime: {humanize_runtime(candidate_result.best_test_runtime)} " + f"(measured over {candidate_result.max_loop_count} " + f"loop{'s' if candidate_result.max_loop_count > 1 else ''})" + ) + tree.add(f"Speedup percentage: {perf_gain * 100:.1f}%") + tree.add(f"Speedup ratio: {perf_gain + 1:.3f}X") line_profile_test_results = self.line_profiler_step( code_context=code_context, original_helper_code=original_helper_code, @@ -681,22 +687,31 @@ def determine_best_candidate( ) ) else: - tree.add( - f"Summed runtime: {humanize_runtime(best_test_runtime)} " - f"(measured over {candidate_result.max_loop_count} " - f"loop{'s' if candidate_result.max_loop_count > 1 else ''})" - ) - tree.add(f"Speedup percentage: {perf_gain * 100:.1f}%") - tree.add(f"Speedup ratio: {perf_gain + 1:.3f}X") - if ( + # For async functions, prioritize throughput metrics over runtime even for slow candidates + is_async = ( original_code_baseline.async_throughput is not None and candidate_result.async_throughput is not None - ): + ) + + if is_async: throughput_gain_value = throughput_gain( original_throughput=original_code_baseline.async_throughput, optimized_throughput=candidate_result.async_throughput, ) - tree.add(f"Throughput gain: {throughput_gain_value * 100:.1f}%") + tree.add(f"Async throughput: {candidate_result.async_throughput} executions") + tree.add(f"Throughput change: {throughput_gain_value * 100:.1f}%") + tree.add( + f"(Runtime for reference: {humanize_runtime(best_test_runtime)} over " + f"{candidate_result.max_loop_count} loop{'s' if candidate_result.max_loop_count > 1 else ''})" + ) + else: + tree.add( + f"Summed runtime: {humanize_runtime(best_test_runtime)} " + f"(measured over {candidate_result.max_loop_count} " + f"loop{'s' if candidate_result.max_loop_count > 1 else ''})" + ) + tree.add(f"Speedup percentage: {perf_gain * 100:.1f}%") + tree.add(f"Speedup ratio: {perf_gain + 1:.3f}X") if is_LSP_enabled(): lsp_log(LspMarkdownMessage(markdown=tree_to_markdown(tree))) diff --git a/uv.lock b/uv.lock index 0d99bdf15..96c6b9f98 100644 --- a/uv.lock +++ b/uv.lock @@ -372,6 +372,7 @@ dev = [ ] tests = [ { name = "black" }, + { name = "eval-type-backport" }, { name = "jax", version = "0.4.30", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "jax", version = "0.6.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, { name = "jax", version = "0.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, @@ -451,6 +452,7 @@ dev = [ ] tests = [ { name = "black", specifier = ">=25.9.0" }, + { name = "eval-type-backport" }, { name = "jax", specifier = ">=0.4.30" }, { name = "numpy", specifier = ">=2.0.2" }, { name = "pandas", specifier = ">=2.3.3" }, @@ -699,6 +701,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1b/c2/4bc8cd09b14e28ce3f406a8b05761bed0d785d1ca8c2a5c6684d884c66a2/editor-1.6.6-py3-none-any.whl", hash = "sha256:e818e6913f26c2a81eadef503a2741d7cca7f235d20e217274a009ecd5a74abf", size = 4017, upload-time = "2024-01-25T10:44:58.66Z" }, ] +[[package]] +name = "eval-type-backport" +version = "0.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/30/ea/8b0ac4469d4c347c6a385ff09dc3c048c2d021696664e26c7ee6791631b5/eval_type_backport-0.2.2.tar.gz", hash = "sha256:f0576b4cf01ebb5bd358d02314d31846af5e07678387486e2c798af0e7d849c1", size = 9079, upload-time = "2024-12-21T20:09:46.005Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/31/55cd413eaccd39125368be33c46de24a1f639f2e12349b0361b4678f3915/eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a", size = 5830, upload-time = "2024-12-21T20:09:44.175Z" }, +] + [[package]] name = "exceptiongroup" version = "1.3.0" From 2d9086035be9280272b178a17c596510487df627 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Sat, 8 Nov 2025 01:58:15 -0800 Subject: [PATCH 03/12] first pass --- codeflash/github/PrComment.py | 10 +++++++++- codeflash/optimization/function_optimizer.py | 9 +++++++-- codeflash/result/create_pr.py | 4 ++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/codeflash/github/PrComment.py b/codeflash/github/PrComment.py index 4de25230a..772a1e42f 100644 --- a/codeflash/github/PrComment.py +++ b/codeflash/github/PrComment.py @@ -21,6 +21,8 @@ class PrComment: winning_behavior_test_results: TestResults winning_benchmarking_test_results: TestResults benchmark_details: Optional[list[BenchmarkDetail]] = None + original_async_throughput: Optional[int] = None + best_async_throughput: Optional[int] = None def to_json(self) -> dict[str, Union[dict[str, dict[str, int]], int, str, Optional[list[BenchmarkDetail]]]]: report_table = { @@ -29,7 +31,7 @@ def to_json(self) -> dict[str, Union[dict[str, dict[str, int]], int, str, Option if test_type.to_name() } - return { + result = { "optimization_explanation": self.optimization_explanation, "best_runtime": humanize_runtime(self.best_runtime), "original_runtime": humanize_runtime(self.original_runtime), @@ -42,6 +44,12 @@ def to_json(self) -> dict[str, Union[dict[str, dict[str, int]], int, str, Option "benchmark_details": self.benchmark_details if self.benchmark_details else None, } + if self.original_async_throughput is not None and self.best_async_throughput is not None: + result["original_async_throughput"] = str(self.original_async_throughput) + result["best_async_throughput"] = str(self.best_async_throughput) + + return result + class FileDiffContent(BaseModel): oldContent: str # noqa: N815 diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 0bb7d2107..706172ba8 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -1509,7 +1509,8 @@ def process_review( raise_pr = not self.args.no_pr staging_review = self.args.staging_review opt_review_response = "" - if raise_pr or staging_review: + # Skip optimization review for async functions for now + if (raise_pr or staging_review) and not self.function_to_optimize.is_async: data["root_dir"] = git_root_dir() try: opt_review_response = self.aiservice_client.get_optimization_review( @@ -1517,8 +1518,12 @@ def process_review( ) except Exception as e: logger.debug(f"optimization review response failed, investigate {e}") - data["optimization_review"] = opt_review_response + # Always set optimization_review in data (empty string for async functions) + data["optimization_review"] = opt_review_response if raise_pr and not staging_review and opt_review_response != "low": + # Ensure root_dir is set for PR creation (needed for async functions that skip opt_review) + if "root_dir" not in data: + data["root_dir"] = git_root_dir() data["git_remote"] = self.args.git_remote check_create_pr(**data) elif staging_review: diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index 55f3713fd..f888f710a 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -220,6 +220,8 @@ def check_create_pr( winning_behavior_test_results=explanation.winning_behavior_test_results, winning_benchmarking_test_results=explanation.winning_benchmarking_test_results, benchmark_details=explanation.benchmark_details, + original_async_throughput=explanation.original_async_throughput, + best_async_throughput=explanation.best_async_throughput, ), existing_tests=existing_tests_source, generated_tests=generated_original_test_source, @@ -270,6 +272,8 @@ def check_create_pr( winning_behavior_test_results=explanation.winning_behavior_test_results, winning_benchmarking_test_results=explanation.winning_benchmarking_test_results, benchmark_details=explanation.benchmark_details, + original_async_throughput=explanation.original_async_throughput, + best_async_throughput=explanation.best_async_throughput, ), existing_tests=existing_tests_source, generated_tests=generated_original_test_source, From 77ea2ee084d52b694193a73a252ca150d7e857c4 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Sat, 8 Nov 2025 02:15:39 -0800 Subject: [PATCH 04/12] improve reporting for async optimizations --- codeflash/result/explanation.py | 35 ++++++++++----------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/codeflash/result/explanation.py b/codeflash/result/explanation.py index 7b28f8ac1..23135a09a 100644 --- a/codeflash/result/explanation.py +++ b/codeflash/result/explanation.py @@ -30,8 +30,14 @@ class Explanation: @property def perf_improvement_line(self) -> str: - runtime_improvement = self.speedup + # speedup property already handles choosing between runtime and throughput + return f"{self.speedup_pct} improvement ({self.speedup_x} faster)." + @property + def speedup(self) -> float: + runtime_improvement = (self.original_runtime_ns / self.best_runtime_ns) - 1 + + # Use throughput improvement if we have async metrics and throughput is better if ( self.original_async_throughput is not None and self.best_async_throughput is not None @@ -43,15 +49,9 @@ def perf_improvement_line(self) -> str: # Use throughput metrics if throughput improvement is better or runtime got worse if throughput_improvement > runtime_improvement or runtime_improvement <= 0: - throughput_pct = f"{throughput_improvement * 100:,.0f}%" - throughput_x = f"{throughput_improvement + 1:,.2f}x" - return f"{throughput_pct} improvement ({throughput_x} faster)." - - return f"{self.speedup_pct} improvement ({self.speedup_x} faster)." + return throughput_improvement - @property - def speedup(self) -> float: - return (self.original_runtime_ns / self.best_runtime_ns) - 1 + return runtime_improvement @property def speedup_x(self) -> str: @@ -68,21 +68,6 @@ def __str__(self) -> str: best_runtime_human = humanize_runtime(self.best_runtime_ns) # Determine if we're showing throughput or runtime improvements - runtime_improvement = self.speedup - is_using_throughput_metric = False - - if ( - self.original_async_throughput is not None - and self.best_async_throughput is not None - and self.original_async_throughput > 0 - ): - throughput_improvement = throughput_gain( - original_throughput=self.original_async_throughput, optimized_throughput=self.best_async_throughput - ) - - if throughput_improvement > runtime_improvement or runtime_improvement <= 0: - is_using_throughput_metric = True - benchmark_info = "" if self.benchmark_details: @@ -123,7 +108,7 @@ def __str__(self) -> str: console.print(table) benchmark_info = cast("StringIO", console.file).getvalue() + "\n" # Cast for mypy - if is_using_throughput_metric: + if self.original_async_throughput is not None and self.best_async_throughput is not None: performance_description = ( f"Throughput improved from {self.original_async_throughput} to {self.best_async_throughput} operations/second " f"(runtime: {original_runtime_human} β†’ {best_runtime_human})\n\n" From 0038b20533f15fa3b1ac45d0f7dbd7798bafeb83 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Sat, 8 Nov 2025 04:10:46 -0800 Subject: [PATCH 05/12] remove some deduplication --- .../code_utils/instrument_existing_tests.py | 49 +++++++++---------- codeflash/optimization/function_optimizer.py | 22 +++------ tests/test_async_run_and_parse_tests.py | 16 +++--- tests/test_instrument_async_tests.py | 16 +++--- 4 files changed, 46 insertions(+), 57 deletions(-) diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py index a776c4d45..2f48fae33 100644 --- a/codeflash/code_utils/instrument_existing_tests.py +++ b/codeflash/code_utils/instrument_existing_tests.py @@ -685,25 +685,6 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None: ) -def instrument_source_module_with_async_decorators( - source_path: Path, function_to_optimize: FunctionToOptimize, mode: TestingMode = TestingMode.BEHAVIOR -) -> tuple[bool, str | None]: - if not function_to_optimize.is_async: - return False, None - - try: - with source_path.open(encoding="utf8") as f: - source_code = f.read() - - modified_code, decorator_added = add_async_decorator_to_function(source_code, function_to_optimize, mode) - - if decorator_added: - return True, modified_code - - except Exception: - return False, None - else: - return False, None def inject_async_profiling_into_existing_test( @@ -1289,25 +1270,35 @@ def leave_Module(self, original_node: cst.Module, updated_node: cst.Module) -> c def add_async_decorator_to_function( - source_code: str, function: FunctionToOptimize, mode: TestingMode = TestingMode.BEHAVIOR -) -> tuple[str, bool]: + source: str | Path, function: FunctionToOptimize, mode: TestingMode = TestingMode.BEHAVIOR +) -> tuple[str, bool] | tuple[bool, str | None]: """Add async decorator to an async function definition. Args: ---- - source_code: The source code to modify. + source: Either source code string or path to source file. function: The FunctionToOptimize object representing the target async function. mode: The testing mode to determine which decorator to apply. Returns: ------- - Tuple of (modified_source_code, was_decorator_added). + For string input: Tuple of (modified_source_code, was_decorator_added). + For Path input: Tuple of (was_decorator_added, modified_source_code or None). """ + is_path_input = isinstance(source, Path) + if not function.is_async: - return source_code, False + return (False, None) if is_path_input else (source if isinstance(source, str) else "", False) try: + # Read source code if path provided + if is_path_input: + with source.open(encoding="utf8") as f: + source_code = f.read() + else: + source_code = source + module = cst.parse_module(source_code) # Add the decorator to the function @@ -1319,11 +1310,15 @@ def add_async_decorator_to_function( import_transformer = AsyncDecoratorImportAdder(mode) module = module.visit(import_transformer) - return sort_imports(code=module.code, float_to_top=True), decorator_transformer.added_decorator + modified_code = sort_imports(code=module.code, float_to_top=True) except Exception as e: logger.exception(f"Error adding async decorator to function {function.qualified_name}: {e}") - return source_code, False - + return (False, None) if is_path_input else (source_code, False) + else: + # Success - return based on whether decorator was added and input type + if decorator_transformer.added_decorator: + return (True, modified_code) if is_path_input else (modified_code, True) + return (False, None) if is_path_input else (modified_code, False) def create_instrumented_source_module_path(source_path: Path, temp_dir: Path) -> Path: instrumented_filename = f"instrumented_{source_path.name}" diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 706172ba8..32b35be77 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -1591,9 +1591,9 @@ def establish_original_code_baseline( test_env = self.get_test_env(codeflash_loop_index=0, codeflash_test_iteration=0, codeflash_tracer_disable=1) if self.function_to_optimize.is_async: - from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators + from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - success, instrumented_source = instrument_source_module_with_async_decorators( + success, instrumented_source = add_async_decorator_to_function( self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.BEHAVIOR ) if success and instrumented_source: @@ -1644,11 +1644,9 @@ def establish_original_code_baseline( console.rule() with progress_bar("Running performance benchmarks..."): if self.function_to_optimize.is_async: - from codeflash.code_utils.instrument_existing_tests import ( - instrument_source_module_with_async_decorators, - ) + from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - success, instrumented_source = instrument_source_module_with_async_decorators( + success, instrumented_source = add_async_decorator_to_function( self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.PERFORMANCE ) if success and instrumented_source: @@ -1779,11 +1777,9 @@ def run_optimized_candidate( for module_abspath in original_helper_code: candidate_helper_code[module_abspath] = Path(module_abspath).read_text("utf-8") if self.function_to_optimize.is_async: - from codeflash.code_utils.instrument_existing_tests import ( - instrument_source_module_with_async_decorators, - ) + from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - success, instrumented_source = instrument_source_module_with_async_decorators( + success, instrumented_source = add_async_decorator_to_function( self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.BEHAVIOR ) if success and instrumented_source: @@ -1832,11 +1828,9 @@ def run_optimized_candidate( if test_framework == "pytest": # For async functions, instrument at definition site for performance benchmarking if self.function_to_optimize.is_async: - from codeflash.code_utils.instrument_existing_tests import ( - instrument_source_module_with_async_decorators, - ) + from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - success, instrumented_source = instrument_source_module_with_async_decorators( + success, instrumented_source = add_async_decorator_to_function( self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.PERFORMANCE ) if success and instrumented_source: diff --git a/tests/test_async_run_and_parse_tests.py b/tests/test_async_run_and_parse_tests.py index 9a97c4570..c97ec4ff8 100644 --- a/tests/test_async_run_and_parse_tests.py +++ b/tests/test_async_run_and_parse_tests.py @@ -11,7 +11,7 @@ from codeflash.models.models import CodePosition, FunctionParent, TestFile, TestFiles, TestingMode, TestType from codeflash.optimization.optimizer import Optimizer from codeflash.verification.instrument_codeflash_capture import instrument_codeflash_capture -from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators, inject_profiling_into_existing_test +from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function, inject_profiling_into_existing_test @pytest.mark.skipif(sys.platform == "win32", reason="pending support for asyncio on windows") def test_async_bubble_sort_behavior_results() -> None: @@ -51,7 +51,7 @@ async def test_async_sort(): func = FunctionToOptimize(function_name="async_sorter", parents=[], file_path=Path(fto_path), is_async=True) # For async functions, instrument the source module directly with decorators - source_success, instrumented_source = instrument_source_module_with_async_decorators( + source_success, instrumented_source = add_async_decorator_to_function( fto_path, func, TestingMode.BEHAVIOR ) @@ -179,7 +179,7 @@ async def test_async_class_sort(): is_async=True, ) - source_success, instrumented_source = instrument_source_module_with_async_decorators( + source_success, instrumented_source = add_async_decorator_to_function( fto_path, func, TestingMode.BEHAVIOR ) @@ -293,7 +293,7 @@ async def test_async_perf(): func = FunctionToOptimize(function_name="async_sorter", parents=[], file_path=Path(fto_path), is_async=True) # Instrument the source module with async performance decorators - source_success, instrumented_source = instrument_source_module_with_async_decorators( + source_success, instrumented_source = add_async_decorator_to_function( fto_path, func, TestingMode.PERFORMANCE ) @@ -397,7 +397,7 @@ async def async_error_function(lst): func = FunctionToOptimize(function_name="async_error_function", parents=[], file_path=Path(fto_path), is_async=True) - source_success, instrumented_source = instrument_source_module_with_async_decorators( + source_success, instrumented_source = add_async_decorator_to_function( fto_path, func, TestingMode.BEHAVIOR ) @@ -554,7 +554,7 @@ async def test_async_multi(): func = FunctionToOptimize(function_name="async_sorter", parents=[], file_path=Path(fto_path), is_async=True) - source_success, instrumented_source = instrument_source_module_with_async_decorators( + source_success, instrumented_source = add_async_decorator_to_function( fto_path, func, TestingMode.BEHAVIOR ) @@ -670,7 +670,7 @@ async def test_async_edge_cases(): func = FunctionToOptimize(function_name="async_sorter", parents=[], file_path=Path(fto_path), is_async=True) - source_success, instrumented_source = instrument_source_module_with_async_decorators( + source_success, instrumented_source = add_async_decorator_to_function( fto_path, func, TestingMode.BEHAVIOR ) @@ -973,7 +973,7 @@ async def test_mixed_sorting(): async_func = FunctionToOptimize(function_name="async_merge_sort", parents=[], file_path=Path(mixed_fto_path), is_async=True) - source_success, instrumented_source = instrument_source_module_with_async_decorators( + source_success, instrumented_source = add_async_decorator_to_function( mixed_fto_path, async_func, TestingMode.BEHAVIOR ) diff --git a/tests/test_instrument_async_tests.py b/tests/test_instrument_async_tests.py index bd526ecc6..a89c8a2f9 100644 --- a/tests/test_instrument_async_tests.py +++ b/tests/test_instrument_async_tests.py @@ -247,9 +247,9 @@ async def test_async_function(): func = FunctionToOptimize(function_name="async_function", parents=[], file_path=Path("my_module.py"), is_async=True) # First instrument the source module - from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators + from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - source_success, instrumented_source = instrument_source_module_with_async_decorators( + source_success, instrumented_source = add_async_decorator_to_function( source_file, func, TestingMode.BEHAVIOR ) @@ -304,9 +304,9 @@ async def test_async_function(): func = FunctionToOptimize(function_name="async_function", parents=[], file_path=Path("my_module.py"), is_async=True) # First instrument the source module - from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators + from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - source_success, instrumented_source = instrument_source_module_with_async_decorators( + source_success, instrumented_source = add_async_decorator_to_function( source_file, func, TestingMode.PERFORMANCE ) @@ -371,9 +371,9 @@ async def test_mixed_functions(): function_name="async_function", parents=[], file_path=Path("my_module.py"), is_async=True ) - from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators + from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - source_success, instrumented_source = instrument_source_module_with_async_decorators( + source_success, instrumented_source = add_async_decorator_to_function( source_file, async_func, TestingMode.BEHAVIOR ) @@ -545,9 +545,9 @@ async def test_multiple_calls(): ) # First instrument the source module with async decorators - from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators + from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - source_success, instrumented_source = instrument_source_module_with_async_decorators( + source_success, instrumented_source = add_async_decorator_to_function( source_file, func, TestingMode.BEHAVIOR ) From 6d63aa253f6f609b7e14f17c3b041d52adb3099e Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Sat, 8 Nov 2025 04:15:39 -0800 Subject: [PATCH 06/12] simplify usage --- .../code_utils/instrument_existing_tests.py | 23 ++++++--- codeflash/optimization/function_optimizer.py | 30 ++---------- tests/test_async_run_and_parse_tests.py | 47 +++++++++---------- tests/test_instrument_async_tests.py | 34 +++++++------- 4 files changed, 58 insertions(+), 76 deletions(-) diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py index 2f48fae33..6771284e0 100644 --- a/codeflash/code_utils/instrument_existing_tests.py +++ b/codeflash/code_utils/instrument_existing_tests.py @@ -1271,25 +1271,26 @@ def leave_Module(self, original_node: cst.Module, updated_node: cst.Module) -> c def add_async_decorator_to_function( source: str | Path, function: FunctionToOptimize, mode: TestingMode = TestingMode.BEHAVIOR -) -> tuple[str, bool] | tuple[bool, str | None]: +) -> tuple[str, bool] | bool: """Add async decorator to an async function definition. Args: ---- source: Either source code string or path to source file. + When Path is provided, the file is modified in-place. function: The FunctionToOptimize object representing the target async function. mode: The testing mode to determine which decorator to apply. Returns: ------- For string input: Tuple of (modified_source_code, was_decorator_added). - For Path input: Tuple of (was_decorator_added, modified_source_code or None). + For Path input: Boolean indicating success (file is written directly). """ is_path_input = isinstance(source, Path) if not function.is_async: - return (False, None) if is_path_input else (source if isinstance(source, str) else "", False) + return False if is_path_input else (source if isinstance(source, str) else "", False) try: # Read source code if path provided @@ -1313,12 +1314,20 @@ def add_async_decorator_to_function( modified_code = sort_imports(code=module.code, float_to_top=True) except Exception as e: logger.exception(f"Error adding async decorator to function {function.qualified_name}: {e}") - return (False, None) if is_path_input else (source_code, False) + return False if is_path_input else (source_code, False) else: - # Success - return based on whether decorator was added and input type + # Success - for Path input, write file and return bool; for string input, return code + bool + if is_path_input: + if decorator_transformer.added_decorator: + with source.open("w", encoding="utf8") as f: + f.write(modified_code) + logger.debug(f"Applied async {mode.value} instrumentation to {source}") + return True + return False + # String input: return modified code and whether decorator was added if decorator_transformer.added_decorator: - return (True, modified_code) if is_path_input else (modified_code, True) - return (False, None) if is_path_input else (modified_code, False) + return (modified_code, True) + return (modified_code, False) def create_instrumented_source_module_path(source_path: Path, temp_dir: Path) -> Path: instrumented_filename = f"instrumented_{source_path.name}" diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 32b35be77..05f15ef01 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -1593,13 +1593,9 @@ def establish_original_code_baseline( if self.function_to_optimize.is_async: from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - success, instrumented_source = add_async_decorator_to_function( + success = add_async_decorator_to_function( self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.BEHAVIOR ) - if success and instrumented_source: - with self.function_to_optimize.file_path.open("w", encoding="utf8") as f: - f.write(instrumented_source) - logger.debug(f"Applied async instrumentation to {self.function_to_optimize.file_path}") # Instrument codeflash capture with progress_bar("Running tests to establish original code behavior..."): @@ -1646,15 +1642,9 @@ def establish_original_code_baseline( if self.function_to_optimize.is_async: from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - success, instrumented_source = add_async_decorator_to_function( + add_async_decorator_to_function( self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.PERFORMANCE ) - if success and instrumented_source: - with self.function_to_optimize.file_path.open("w", encoding="utf8") as f: - f.write(instrumented_source) - logger.debug( - f"Applied async performance instrumentation to {self.function_to_optimize.file_path}" - ) try: benchmarking_results, _ = self.run_and_parse_tests( @@ -1779,15 +1769,9 @@ def run_optimized_candidate( if self.function_to_optimize.is_async: from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - success, instrumented_source = add_async_decorator_to_function( + add_async_decorator_to_function( self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.BEHAVIOR ) - if success and instrumented_source: - with self.function_to_optimize.file_path.open("w", encoding="utf8") as f: - f.write(instrumented_source) - logger.debug( - f"Applied async behavioral instrumentation to {self.function_to_optimize.file_path} for candidate {optimization_candidate_index}" - ) try: instrument_codeflash_capture( @@ -1830,15 +1814,9 @@ def run_optimized_candidate( if self.function_to_optimize.is_async: from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - success, instrumented_source = add_async_decorator_to_function( + add_async_decorator_to_function( self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.PERFORMANCE ) - if success and instrumented_source: - with self.function_to_optimize.file_path.open("w", encoding="utf8") as f: - f.write(instrumented_source) - logger.debug( - f"Applied async performance instrumentation to {self.function_to_optimize.file_path} for candidate {optimization_candidate_index}" - ) try: candidate_benchmarking_results, _ = self.run_and_parse_tests( diff --git a/tests/test_async_run_and_parse_tests.py b/tests/test_async_run_and_parse_tests.py index c97ec4ff8..b273f135a 100644 --- a/tests/test_async_run_and_parse_tests.py +++ b/tests/test_async_run_and_parse_tests.py @@ -51,17 +51,16 @@ async def test_async_sort(): func = FunctionToOptimize(function_name="async_sorter", parents=[], file_path=Path(fto_path), is_async=True) # For async functions, instrument the source module directly with decorators - source_success, instrumented_source = add_async_decorator_to_function( + source_success = add_async_decorator_to_function( fto_path, func, TestingMode.BEHAVIOR ) assert source_success - assert instrumented_source is not None + + # Verify the file was modified + instrumented_source = fto_path.read_text("utf-8") assert '''import asyncio\nfrom typing import List, Union\n\nfrom codeflash.code_utils.codeflash_wrap_decorator import \\\n codeflash_behavior_async\n\n\n@codeflash_behavior_async\nasync def async_sorter(lst: List[Union[int, float]]) -> List[Union[int, float]]:\n """\n Async bubble sort implementation for testing.\n """\n print("codeflash stdout: Async sorting list")\n \n await asyncio.sleep(0.01)\n \n n = len(lst)\n for i in range(n):\n for j in range(0, n - i - 1):\n if lst[j] > lst[j + 1]:\n lst[j], lst[j + 1] = lst[j + 1], lst[j]\n \n result = lst.copy()\n print(f"result: {result}")\n return result\n\n\nclass AsyncBubbleSorter:\n """Class with async sorting method for testing."""\n \n async def sorter(self, lst: List[Union[int, float]]) -> List[Union[int, float]]:\n """\n Async bubble sort implementation within a class.\n """\n print("codeflash stdout: AsyncBubbleSorter.sorter() called")\n \n # Add some async delay\n await asyncio.sleep(0.005)\n \n n = len(lst)\n for i in range(n):\n for j in range(0, n - i - 1):\n if lst[j] > lst[j + 1]:\n lst[j], lst[j + 1] = lst[j + 1], lst[j]\n \n result = lst.copy()\n return result\n''' in instrumented_source - # Write the instrumented source back - fto_path.write_text(instrumented_source, "utf-8") - # Add codeflash capture instrument_codeflash_capture(func, {}, tests_root) @@ -179,16 +178,16 @@ async def test_async_class_sort(): is_async=True, ) - source_success, instrumented_source = add_async_decorator_to_function( + source_success = add_async_decorator_to_function( fto_path, func, TestingMode.BEHAVIOR ) assert source_success - assert instrumented_source is not None + + # Verify the file was modified + instrumented_source = fto_path.read_text("utf-8") assert "@codeflash_behavior_async" in instrumented_source - fto_path.write_text(instrumented_source, "utf-8") - instrument_codeflash_capture(func, {}, tests_root) opt = Optimizer( @@ -293,16 +292,16 @@ async def test_async_perf(): func = FunctionToOptimize(function_name="async_sorter", parents=[], file_path=Path(fto_path), is_async=True) # Instrument the source module with async performance decorators - source_success, instrumented_source = add_async_decorator_to_function( + source_success = add_async_decorator_to_function( fto_path, func, TestingMode.PERFORMANCE ) assert source_success - assert instrumented_source is not None + + # Verify the file was modified + instrumented_source = fto_path.read_text("utf-8") assert '''import asyncio\nfrom typing import List, Union\n\nfrom codeflash.code_utils.codeflash_wrap_decorator import \\\n codeflash_performance_async\n\n\n@codeflash_performance_async\nasync def async_sorter(lst: List[Union[int, float]]) -> List[Union[int, float]]:\n """\n Async bubble sort implementation for testing.\n """\n print("codeflash stdout: Async sorting list")\n \n await asyncio.sleep(0.01)\n \n n = len(lst)\n for i in range(n):\n for j in range(0, n - i - 1):\n if lst[j] > lst[j + 1]:\n lst[j], lst[j + 1] = lst[j + 1], lst[j]\n \n result = lst.copy()\n print(f"result: {result}")\n return result\n\n\nclass AsyncBubbleSorter:\n """Class with async sorting method for testing."""\n \n async def sorter(self, lst: List[Union[int, float]]) -> List[Union[int, float]]:\n """\n Async bubble sort implementation within a class.\n """\n print("codeflash stdout: AsyncBubbleSorter.sorter() called")\n \n # Add some async delay\n await asyncio.sleep(0.005)\n \n n = len(lst)\n for i in range(n):\n for j in range(0, n - i - 1):\n if lst[j] > lst[j + 1]:\n lst[j], lst[j + 1] = lst[j + 1], lst[j]\n \n result = lst.copy()\n return result\n''' == instrumented_source - fto_path.write_text(instrumented_source, "utf-8") - instrument_codeflash_capture(func, {}, tests_root) opt = Optimizer( @@ -397,12 +396,14 @@ async def async_error_function(lst): func = FunctionToOptimize(function_name="async_error_function", parents=[], file_path=Path(fto_path), is_async=True) - source_success, instrumented_source = add_async_decorator_to_function( + source_success = add_async_decorator_to_function( fto_path, func, TestingMode.BEHAVIOR ) assert source_success - assert instrumented_source is not None + + # Verify the file was modified + instrumented_source = fto_path.read_text("utf-8") expected_instrumented_source = """import asyncio from typing import List, Union @@ -459,8 +460,6 @@ async def async_error_function(lst): raise ValueError("Test error") """ assert expected_instrumented_source == instrumented_source - - fto_path.write_text(instrumented_source, "utf-8") instrument_codeflash_capture(func, {}, tests_root) opt = Optimizer( @@ -554,12 +553,11 @@ async def test_async_multi(): func = FunctionToOptimize(function_name="async_sorter", parents=[], file_path=Path(fto_path), is_async=True) - source_success, instrumented_source = add_async_decorator_to_function( + source_success = add_async_decorator_to_function( fto_path, func, TestingMode.BEHAVIOR ) assert source_success - fto_path.write_text(instrumented_source, "utf-8") instrument_codeflash_capture(func, {}, tests_root) opt = Optimizer( @@ -670,12 +668,11 @@ async def test_async_edge_cases(): func = FunctionToOptimize(function_name="async_sorter", parents=[], file_path=Path(fto_path), is_async=True) - source_success, instrumented_source = add_async_decorator_to_function( + source_success = add_async_decorator_to_function( fto_path, func, TestingMode.BEHAVIOR ) assert source_success - fto_path.write_text(instrumented_source, "utf-8") instrument_codeflash_capture(func, {}, tests_root) opt = Optimizer( @@ -973,17 +970,17 @@ async def test_mixed_sorting(): async_func = FunctionToOptimize(function_name="async_merge_sort", parents=[], file_path=Path(mixed_fto_path), is_async=True) - source_success, instrumented_source = add_async_decorator_to_function( + source_success = add_async_decorator_to_function( mixed_fto_path, async_func, TestingMode.BEHAVIOR ) assert source_success - assert instrumented_source is not None + + # Verify the file was modified + instrumented_source = mixed_fto_path.read_text("utf-8") assert "@codeflash_behavior_async" in instrumented_source assert "async def async_merge_sort" in instrumented_source assert "def sync_quick_sort" in instrumented_source # Should preserve sync function - - mixed_fto_path.write_text(instrumented_source, "utf-8") instrument_codeflash_capture(async_func, {}, tests_root) opt = Optimizer( diff --git a/tests/test_instrument_async_tests.py b/tests/test_instrument_async_tests.py index a89c8a2f9..0386c3650 100644 --- a/tests/test_instrument_async_tests.py +++ b/tests/test_instrument_async_tests.py @@ -249,18 +249,18 @@ async def test_async_function(): # First instrument the source module from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - source_success, instrumented_source = add_async_decorator_to_function( + source_success = add_async_decorator_to_function( source_file, func, TestingMode.BEHAVIOR ) assert source_success is True - assert instrumented_source is not None + + # Verify the file was modified + instrumented_source = source_file.read_text() assert "@codeflash_behavior_async" in instrumented_source assert "from codeflash.code_utils.codeflash_wrap_decorator import" in instrumented_source assert "codeflash_behavior_async" in instrumented_source - source_file.write_text(instrumented_source) - success, instrumented_test_code = inject_profiling_into_existing_test( test_file, [CodePosition(8, 18), CodePosition(11, 19)], func, temp_dir, "pytest", mode=TestingMode.BEHAVIOR ) @@ -306,20 +306,19 @@ async def test_async_function(): # First instrument the source module from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - source_success, instrumented_source = add_async_decorator_to_function( + source_success = add_async_decorator_to_function( source_file, func, TestingMode.PERFORMANCE ) assert source_success is True - assert instrumented_source is not None + + # Verify the file was modified + instrumented_source = source_file.read_text() assert "@codeflash_performance_async" in instrumented_source # Check for the import with line continuation formatting assert "from codeflash.code_utils.codeflash_wrap_decorator import" in instrumented_source assert "codeflash_performance_async" in instrumented_source - # Write the instrumented source back - source_file.write_text(instrumented_source) - # Now test the full pipeline with source module path success, instrumented_test_code = inject_profiling_into_existing_test( test_file, [CodePosition(8, 18)], func, temp_dir, "pytest", mode=TestingMode.PERFORMANCE @@ -373,21 +372,20 @@ async def test_mixed_functions(): from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - source_success, instrumented_source = add_async_decorator_to_function( + source_success = add_async_decorator_to_function( source_file, async_func, TestingMode.BEHAVIOR ) assert source_success - assert instrumented_source is not None + + # Verify the file was modified + instrumented_source = source_file.read_text() assert "@codeflash_behavior_async" in instrumented_source assert "from codeflash.code_utils.codeflash_wrap_decorator import" in instrumented_source assert "codeflash_behavior_async" in instrumented_source # Sync function should remain unchanged assert "def sync_function(x: int, y: int) -> int:" in instrumented_source - # Write instrumented source back - source_file.write_text(instrumented_source) - success, instrumented_test_code = inject_profiling_into_existing_test( test_file, [CodePosition(8, 18), CodePosition(11, 19)], @@ -547,16 +545,16 @@ async def test_multiple_calls(): # First instrument the source module with async decorators from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function - source_success, instrumented_source = add_async_decorator_to_function( + source_success = add_async_decorator_to_function( source_file, func, TestingMode.BEHAVIOR ) assert source_success - assert instrumented_source is not None + + # Verify the file was modified + instrumented_source = source_file.read_text() assert "@codeflash_behavior_async" in instrumented_source - source_file.write_text(instrumented_source) - import ast tree = ast.parse(test_code_multiple_calls) From c51e159cc38622a1f6638b08e8a64750cb981a73 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Sat, 8 Nov 2025 04:19:24 -0800 Subject: [PATCH 07/12] refactor add_async_decorator_to_function --- .../code_utils/instrument_existing_tests.py | 46 ++++------ tests/test_instrument_async_tests.py | 87 +++++++++++-------- 2 files changed, 67 insertions(+), 66 deletions(-) diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py index 6771284e0..6c47d9e48 100644 --- a/codeflash/code_utils/instrument_existing_tests.py +++ b/codeflash/code_utils/instrument_existing_tests.py @@ -685,8 +685,6 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None: ) - - def inject_async_profiling_into_existing_test( test_path: Path, call_positions: list[CodePosition], @@ -1270,35 +1268,28 @@ def leave_Module(self, original_node: cst.Module, updated_node: cst.Module) -> c def add_async_decorator_to_function( - source: str | Path, function: FunctionToOptimize, mode: TestingMode = TestingMode.BEHAVIOR -) -> tuple[str, bool] | bool: - """Add async decorator to an async function definition. + source_path: Path, function: FunctionToOptimize, mode: TestingMode = TestingMode.BEHAVIOR +) -> bool: + """Add async decorator to an async function definition and write back to file. Args: ---- - source: Either source code string or path to source file. - When Path is provided, the file is modified in-place. + source_path: Path to the source file to modify in-place. function: The FunctionToOptimize object representing the target async function. mode: The testing mode to determine which decorator to apply. Returns: ------- - For string input: Tuple of (modified_source_code, was_decorator_added). - For Path input: Boolean indicating success (file is written directly). + Boolean indicating whether the decorator was successfully added. """ - is_path_input = isinstance(source, Path) - if not function.is_async: - return False if is_path_input else (source if isinstance(source, str) else "", False) + return False try: - # Read source code if path provided - if is_path_input: - with source.open(encoding="utf8") as f: - source_code = f.read() - else: - source_code = source + # Read source code + with source_path.open(encoding="utf8") as f: + source_code = f.read() module = cst.parse_module(source_code) @@ -1314,20 +1305,15 @@ def add_async_decorator_to_function( modified_code = sort_imports(code=module.code, float_to_top=True) except Exception as e: logger.exception(f"Error adding async decorator to function {function.qualified_name}: {e}") - return False if is_path_input else (source_code, False) + return False else: - # Success - for Path input, write file and return bool; for string input, return code + bool - if is_path_input: - if decorator_transformer.added_decorator: - with source.open("w", encoding="utf8") as f: - f.write(modified_code) - logger.debug(f"Applied async {mode.value} instrumentation to {source}") - return True - return False - # String input: return modified code and whether decorator was added if decorator_transformer.added_decorator: - return (modified_code, True) - return (modified_code, False) + with source_path.open("w", encoding="utf8") as f: + f.write(modified_code) + logger.debug(f"Applied async {mode.value} instrumentation to {source_path}") + return True + return False + def create_instrumented_source_module_path(source_path: Path, temp_dir: Path) -> Path: instrumented_filename = f"instrumented_{source_path.name}" diff --git a/tests/test_instrument_async_tests.py b/tests/test_instrument_async_tests.py index 0386c3650..16332ee3e 100644 --- a/tests/test_instrument_async_tests.py +++ b/tests/test_instrument_async_tests.py @@ -54,7 +54,7 @@ def temp_dir(): @pytest.mark.skipif(sys.platform == "win32", reason="pending support for asyncio on windows") -def test_async_decorator_application_behavior_mode(): +def test_async_decorator_application_behavior_mode(temp_dir): async_function_code = ''' import asyncio @@ -78,18 +78,22 @@ async def async_function(x: int, y: int) -> int: return x * y ''' + test_file = temp_dir / "test_async.py" + test_file.write_text(async_function_code) + func = FunctionToOptimize( - function_name="async_function", file_path=Path("test_async.py"), parents=[], is_async=True + function_name="async_function", file_path=test_file, parents=[], is_async=True ) - modified_code, decorator_added = add_async_decorator_to_function(async_function_code, func, TestingMode.BEHAVIOR) + decorator_added = add_async_decorator_to_function(test_file, func, TestingMode.BEHAVIOR) assert decorator_added + modified_code = test_file.read_text() assert modified_code.strip() == expected_decorated_code.strip() @pytest.mark.skipif(sys.platform == "win32", reason="pending support for asyncio on windows") -def test_async_decorator_application_performance_mode(): +def test_async_decorator_application_performance_mode(temp_dir): async_function_code = ''' import asyncio @@ -113,18 +117,22 @@ async def async_function(x: int, y: int) -> int: return x * y ''' + test_file = temp_dir / "test_async.py" + test_file.write_text(async_function_code) + func = FunctionToOptimize( - function_name="async_function", file_path=Path("test_async.py"), parents=[], is_async=True + function_name="async_function", file_path=test_file, parents=[], is_async=True ) - modified_code, decorator_added = add_async_decorator_to_function(async_function_code, func, TestingMode.PERFORMANCE) + decorator_added = add_async_decorator_to_function(test_file, func, TestingMode.PERFORMANCE) assert decorator_added + modified_code = test_file.read_text() assert modified_code.strip() == expected_decorated_code.strip() @pytest.mark.skipif(sys.platform == "win32", reason="pending support for asyncio on windows") -def test_async_class_method_decorator_application(): +def test_async_class_method_decorator_application(temp_dir): async_class_code = ''' import asyncio @@ -162,21 +170,25 @@ def sync_method(self, a: int, b: int) -> int: return a - b ''' + test_file = temp_dir / "test_async.py" + test_file.write_text(async_class_code) + func = FunctionToOptimize( function_name="async_method", - file_path=Path("test_async.py"), + file_path=test_file, parents=[{"name": "Calculator", "type": "ClassDef"}], is_async=True, ) - modified_code, decorator_added = add_async_decorator_to_function(async_class_code, func, TestingMode.BEHAVIOR) + decorator_added = add_async_decorator_to_function(test_file, func, TestingMode.BEHAVIOR) assert decorator_added + modified_code = test_file.read_text() assert modified_code.strip() == expected_decorated_code.strip() @pytest.mark.skipif(sys.platform == "win32", reason="pending support for asyncio on windows") -def test_async_decorator_no_duplicate_application(): +def test_async_decorator_no_duplicate_application(temp_dir): already_decorated_code = ''' from codeflash.code_utils.codeflash_wrap_decorator import codeflash_behavior_async import asyncio @@ -188,28 +200,17 @@ async def async_function(x: int, y: int) -> int: return x * y ''' - expected_reformatted_code = ''' -import asyncio - -from codeflash.code_utils.codeflash_wrap_decorator import \\ - codeflash_behavior_async - - -@codeflash_behavior_async -async def async_function(x: int, y: int) -> int: - """Already decorated async function.""" - await asyncio.sleep(0.01) - return x * y -''' + test_file = temp_dir / "test_async.py" + test_file.write_text(already_decorated_code) func = FunctionToOptimize( - function_name="async_function", file_path=Path("test_async.py"), parents=[], is_async=True + function_name="async_function", file_path=test_file, parents=[], is_async=True ) - modified_code, decorator_added = add_async_decorator_to_function(already_decorated_code, func, TestingMode.BEHAVIOR) + decorator_added = add_async_decorator_to_function(test_file, func, TestingMode.BEHAVIOR) + # Should not add duplicate decorator assert not decorator_added - assert modified_code.strip() == expected_reformatted_code.strip() @pytest.mark.skipif(sys.platform == "win32", reason="pending support for asyncio on windows") @@ -401,7 +402,7 @@ async def test_mixed_functions(): @pytest.mark.skipif(sys.platform == "win32", reason="pending support for asyncio on windows") -def test_async_function_qualified_name_handling(): +def test_async_function_qualified_name_handling(temp_dir): nested_async_code = ''' import asyncio @@ -413,14 +414,17 @@ async def nested_async_method(self, x: int) -> int: return x * 2 ''' + test_file = temp_dir / "test_nested.py" + test_file.write_text(nested_async_code) + func = FunctionToOptimize( function_name="nested_async_method", - file_path=Path("test_nested.py"), + file_path=test_file, parents=[{"name": "OuterClass", "type": "ClassDef"}, {"name": "InnerClass", "type": "ClassDef"}], is_async=True, ) - modified_code, decorator_added = add_async_decorator_to_function(nested_async_code, func, TestingMode.BEHAVIOR) + decorator_added = add_async_decorator_to_function(test_file, func, TestingMode.BEHAVIOR) expected_output = ( """import asyncio @@ -439,11 +443,13 @@ async def nested_async_method(self, x: int) -> int: """ ) + assert decorator_added + modified_code = test_file.read_text() assert modified_code.strip() == expected_output.strip() @pytest.mark.skipif(sys.platform == "win32", reason="pending support for asyncio on windows") -def test_async_decorator_with_existing_decorators(): +def test_async_decorator_with_existing_decorators(temp_dir): """Test async decorator application when function already has other decorators.""" decorated_async_code = ''' import asyncio @@ -462,13 +468,17 @@ async def async_function(x: int, y: int) -> int: return x * y ''' + test_file = temp_dir / "test_async.py" + test_file.write_text(decorated_async_code) + func = FunctionToOptimize( - function_name="async_function", file_path=Path("test_async.py"), parents=[], is_async=True + function_name="async_function", file_path=test_file, parents=[], is_async=True ) - modified_code, decorator_added = add_async_decorator_to_function(decorated_async_code, func, TestingMode.BEHAVIOR) + decorator_added = add_async_decorator_to_function(test_file, func, TestingMode.BEHAVIOR) assert decorator_added + modified_code = test_file.read_text() # Should add codeflash decorator above existing decorators assert "@codeflash_behavior_async" in modified_code assert "@my_decorator" in modified_code @@ -479,25 +489,30 @@ async def async_function(x: int, y: int) -> int: @pytest.mark.skipif(sys.platform == "win32", reason="pending support for asyncio on windows") -def test_sync_function_not_affected_by_async_logic(): +def test_sync_function_not_affected_by_async_logic(temp_dir): sync_function_code = ''' def sync_function(x: int, y: int) -> int: """Regular sync function.""" return x + y ''' + test_file = temp_dir / "test_sync.py" + test_file.write_text(sync_function_code) + sync_func = FunctionToOptimize( function_name="sync_function", - file_path=Path("test_sync.py"), + file_path=test_file, parents=[], is_async=False, ) - modified_code, decorator_added = add_async_decorator_to_function( - sync_function_code, sync_func, TestingMode.BEHAVIOR + decorator_added = add_async_decorator_to_function( + test_file, sync_func, TestingMode.BEHAVIOR ) assert not decorator_added + # File should not be modified for sync functions + modified_code = test_file.read_text() assert modified_code == sync_function_code @pytest.mark.skipif(sys.platform == "win32", reason="pending support for asyncio on windows") From cec82a7b0ae4a35922bebc7fc46d92652106d368 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Sat, 8 Nov 2025 04:31:11 -0800 Subject: [PATCH 08/12] doesn't even work --- codeflash/verification/concolic_testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/verification/concolic_testing.py b/codeflash/verification/concolic_testing.py index 8f30a1562..83e9aed46 100644 --- a/codeflash/verification/concolic_testing.py +++ b/codeflash/verification/concolic_testing.py @@ -36,7 +36,7 @@ def generate_concolic_tests( if ( test_cfg.concolic_test_root_dir - and isinstance(function_to_optimize_ast, (ast.FunctionDef, ast.AsyncFunctionDef)) + and isinstance(function_to_optimize_ast, ast.FunctionDef) and has_typed_parameters(function_to_optimize_ast, function_to_optimize.parents) ): logger.info("Generating concolic opcode coverage tests for the original code…") From 2e639a1b1a2fed7829e0b5c855edf88c5abe918e Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Mon, 10 Nov 2025 17:08:41 -0800 Subject: [PATCH 09/12] fix mypy complants --- codeflash/github/PrComment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codeflash/github/PrComment.py b/codeflash/github/PrComment.py index 772a1e42f..fe0ff095e 100644 --- a/codeflash/github/PrComment.py +++ b/codeflash/github/PrComment.py @@ -24,14 +24,14 @@ class PrComment: original_async_throughput: Optional[int] = None best_async_throughput: Optional[int] = None - def to_json(self) -> dict[str, Union[dict[str, dict[str, int]], int, str, Optional[list[BenchmarkDetail]]]]: + def to_json(self) -> dict[str, Union[str, int, dict[str, dict[str, int]], list[BenchmarkDetail], None]]: report_table = { test_type.to_name(): result for test_type, result in self.winning_behavior_test_results.get_test_pass_fail_report_by_type().items() if test_type.to_name() } - result = { + result: dict[str, Union[str, int, dict[str, dict[str, int]], list[BenchmarkDetail], None]] = { "optimization_explanation": self.optimization_explanation, "best_runtime": humanize_runtime(self.best_runtime), "original_runtime": humanize_runtime(self.original_runtime), From 5ba74576e7bb1d5984b6f3dc849c1435675d17ac Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Tue, 11 Nov 2025 12:16:33 -0800 Subject: [PATCH 10/12] handle libcst exception --- .../context/unused_definition_remover.py | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/codeflash/context/unused_definition_remover.py b/codeflash/context/unused_definition_remover.py index 7fdee53c0..3ce7ceffe 100644 --- a/codeflash/context/unused_definition_remover.py +++ b/codeflash/context/unused_definition_remover.py @@ -469,22 +469,32 @@ def remove_unused_definitions_by_function_names(code: str, qualified_function_na qualified_function_names: Set of function names to keep. For methods, use format 'classname.methodname' """ - module = cst.parse_module(code) - # Collect all definitions (top level classes, variables or function) - definitions = collect_top_level_definitions(module) + try: + module = cst.parse_module(code) + except Exception as e: + logger.debug(f"Failed to parse code with libcst: {type(e).__name__}: {e}") + return code - # Collect dependencies between definitions using the visitor pattern - dependency_collector = DependencyCollector(definitions) - module.visit(dependency_collector) + try: + # Collect all definitions (top level classes, variables or function) + definitions = collect_top_level_definitions(module) - # Mark definitions used by specified functions, and their dependencies recursively - usage_marker = QualifiedFunctionUsageMarker(definitions, qualified_function_names) - usage_marker.mark_used_definitions() + # Collect dependencies between definitions using the visitor pattern + dependency_collector = DependencyCollector(definitions) + module.visit(dependency_collector) - # Apply the recursive removal transformation - modified_module, _ = remove_unused_definitions_recursively(module, definitions) + # Mark definitions used by specified functions, and their dependencies recursively + usage_marker = QualifiedFunctionUsageMarker(definitions, qualified_function_names) + usage_marker.mark_used_definitions() - return modified_module.code if modified_module else "" + # Apply the recursive removal transformation + modified_module, _ = remove_unused_definitions_recursively(module, definitions) + + return modified_module.code if modified_module else "" + except Exception as e: + # If any other error occurs during processing, return the original code + logger.debug(f"Error processing code to remove unused definitions: {type(e).__name__}: {e}") + return code def print_definitions(definitions: dict[str, UsageInfo]) -> None: From 52e4ca047733311b8bb4a3a269304c2de9498f39 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Tue, 11 Nov 2025 12:22:36 -0800 Subject: [PATCH 11/12] fix type checking --- codeflash/context/unused_definition_remover.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/context/unused_definition_remover.py b/codeflash/context/unused_definition_remover.py index 3ce7ceffe..2a288d861 100644 --- a/codeflash/context/unused_definition_remover.py +++ b/codeflash/context/unused_definition_remover.py @@ -490,7 +490,7 @@ def remove_unused_definitions_by_function_names(code: str, qualified_function_na # Apply the recursive removal transformation modified_module, _ = remove_unused_definitions_recursively(module, definitions) - return modified_module.code if modified_module else "" + return modified_module.code if modified_module else "" # noqa: TRY300 except Exception as e: # If any other error occurs during processing, return the original code logger.debug(f"Error processing code to remove unused definitions: {type(e).__name__}: {e}") From 81d19c0ac561ecf943aaf916b53662dc89ed7c9c Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 14 Nov 2025 14:39:57 -0500 Subject: [PATCH 12/12] revert config --- codeflash/code_utils/config_consts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index 1c7f292f0..6b2805fbf 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -3,7 +3,7 @@ MAX_FUNCTION_TEST_SECONDS = 60 N_CANDIDATES = 5 MIN_IMPROVEMENT_THRESHOLD = 0.05 -MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.05 # 5% minimum improvement for async throughput +MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.10 # 10% minimum improvement for async throughput MAX_TEST_FUNCTION_RUNS = 50 MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms N_TESTS_TO_GENERATE = 2