From fec06275efeb6a0d0595455a88b0a209cf51d6b4 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Wed, 11 Jun 2025 22:04:24 -0700 Subject: [PATCH 01/16] first version that does not work correctly --- codeflash/optimization/function_optimizer.py | 13 +- codeflash/result/create_pr.py | 61 +- tests/test_existing_tests_source_for.py | 645 +++++++++++++++++++ 3 files changed, 711 insertions(+), 8 deletions(-) create mode 100644 tests/test_existing_tests_source_for.py diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index f6c7661b4..26d8ede73 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -341,12 +341,6 @@ def optimize_function(self) -> Result[BestOptimization, str]: # noqa: PLR0911 optimized_function=best_optimization.candidate.source_code, ) - existing_tests = existing_tests_source_for( - self.function_to_optimize.qualified_name_with_modules_from_root(self.project_root), - function_to_all_tests, - tests_root=self.test_cfg.tests_root, - ) - original_code_combined = original_helper_code.copy() original_code_combined[explanation.file_path] = self.function_to_optimize_source_code new_code_combined = new_helper_code.copy() @@ -369,6 +363,13 @@ def optimize_function(self) -> Result[BestOptimization, str]: # noqa: PLR0911 generated_tests_str = "\n\n".join( [test.generated_original_test_source for test in generated_tests.generated_tests] ) + existing_tests = existing_tests_source_for( + self.function_to_optimize.qualified_name_with_modules_from_root(self.project_root), + function_to_all_tests, + tests_root=self.test_cfg.tests_root, + original_test_results=original_code_baseline.benchmarking_test_results, + optimized_test_results=best_optimization.winning_benchmarking_test_results, + ) if concolic_test_str: generated_tests_str += "\n\n" + concolic_test_str diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index b9e05e660..a58fc63ea 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -19,7 +19,7 @@ from codeflash.github.PrComment import FileDiffContent, PrComment if TYPE_CHECKING: - from codeflash.models.models import FunctionCalledInTest + from codeflash.models.models import FunctionCalledInTest, TestResults from codeflash.result.explanation import Explanation @@ -27,12 +27,69 @@ def existing_tests_source_for( function_qualified_name_with_modules_from_root: str, function_to_tests: dict[str, set[FunctionCalledInTest]], tests_root: Path, + original_test_results: Optional[TestResults] = None, + optimized_test_results: Optional[TestResults] = None, ) -> str: test_files = function_to_tests.get(function_qualified_name_with_modules_from_root) existing_tests_unique = set() + if test_files: + # Group test cases by test file + test_files_grouped = {} for test_file in test_files: - existing_tests_unique.add("- " + str(Path(test_file.tests_in_file.test_file).relative_to(tests_root))) + file_path = Path(test_file.tests_in_file.test_file) + relative_path = str(file_path.relative_to(tests_root)) + + if relative_path not in test_files_grouped: + test_files_grouped[relative_path] = [] + test_files_grouped[relative_path].append(test_file) + + # Create detailed report for each test file + for relative_path, tests_in_file in sorted(test_files_grouped.items()): + file_line = f"- {relative_path}" + + # Add test case details with timing information if available + if original_test_results and optimized_test_results: + test_case_details = [] + + # Use the same pattern as add_runtime_comments_to_generated_tests + original_runtime_by_test = original_test_results.usable_runtime_data_by_test_case() + optimized_runtime_by_test = optimized_test_results.usable_runtime_data_by_test_case() + + # Collect test function names for this file + test_functions_in_file = {test_file.tests_in_file.test_function for test_file in tests_in_file} + + # Create timing report for each test function + for test_function_name in sorted(test_functions_in_file): + # Find matching runtime data + original_runtimes = [] + optimized_runtimes = [] + + for invocation_id, runtimes in original_runtime_by_test.items(): + if invocation_id.test_function_name == test_function_name: + original_runtimes.extend(runtimes) + + for invocation_id, runtimes in optimized_runtime_by_test.items(): + if invocation_id.test_function_name == test_function_name: + optimized_runtimes.extend(runtimes) + + if original_runtimes and optimized_runtimes: + # Use minimum timing like the generated tests function does + original_time = min(original_runtimes) + optimized_time = min(optimized_runtimes) + + from codeflash.code_utils.time_utils import format_time + + original_str = format_time(original_time) + optimized_str = format_time(optimized_time) + + test_case_details.append(f" - {test_function_name}: {original_str} -> {optimized_str}") + + if test_case_details: + file_line += "\n" + "\n".join(test_case_details) + + existing_tests_unique.add(file_line) + return "\n".join(sorted(existing_tests_unique)) diff --git a/tests/test_existing_tests_source_for.py b/tests/test_existing_tests_source_for.py new file mode 100644 index 000000000..1a00c47c0 --- /dev/null +++ b/tests/test_existing_tests_source_for.py @@ -0,0 +1,645 @@ +"""Tests for the existing_tests_source_for function in result/create_pr.py.""" + +from pathlib import Path +from unittest.mock import patch + +import pytest +from codeflash.models.models import ( + CodePosition, + FunctionCalledInTest, + FunctionTestInvocation, + InvocationId, + TestResults, + TestsInFile, + TestType, VerificationType, +) +from codeflash.result.create_pr import existing_tests_source_for + + +@pytest.fixture +def sample_tests_root(tmp_path: Path) -> Path: + """Create a temporary test root directory.""" + return tmp_path / "tests" + + +@pytest.fixture +def sample_function_to_tests(sample_tests_root: Path) -> dict[str, set[FunctionCalledInTest]]: + """Create sample function to tests mapping.""" + test_file_1 = sample_tests_root / "test_module1.py" + test_file_2 = sample_tests_root / "test_module2.py" + + return { + "my_module.my_function": { + FunctionCalledInTest( + tests_in_file=TestsInFile( + test_file=test_file_1, + test_class=None, + test_function="test_basic_functionality", + test_type=TestType.EXISTING_UNIT_TEST, + ), + position=CodePosition(line_no=10, col_no=4), + ), + FunctionCalledInTest( + tests_in_file=TestsInFile( + test_file=test_file_1, + test_class="TestMyFunction", + test_function="test_edge_cases", + test_type=TestType.EXISTING_UNIT_TEST, + ), + position=CodePosition(line_no=20, col_no=8), + ), + FunctionCalledInTest( + tests_in_file=TestsInFile( + test_file=test_file_2, + test_class=None, + test_function="test_performance", + test_type=TestType.EXISTING_UNIT_TEST, + ), + position=CodePosition(line_no=15, col_no=4), + ), + } + } + + +@pytest.fixture +def sample_original_test_results() -> TestResults: + """Create sample original test results with timing information.""" + results = TestResults() + + # Test case 1: test_basic_functionality with multiple function calls + results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module1", + test_class_name=None, + test_function_name="test_basic_functionality", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/test_module1.py"), + did_pass=True, + runtime=1000, # 1000 ns + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module1", + test_class_name=None, + test_function_name="test_basic_functionality", + function_getting_tested="my_function", + iteration_id="2", + ), + file_name=Path("/tmp/tests/test_module1.py"), + did_pass=True, + runtime=500, # 500 ns + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + # Test case 2: test_edge_cases + results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module1", + test_class_name="TestMyFunction", + test_function_name="test_edge_cases", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/test_module1.py"), + did_pass=True, + runtime=2000, # 2000 ns + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + # Test case 3: test_performance + results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module2", + test_class_name=None, + test_function_name="test_performance", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/test_module2.py"), + did_pass=True, + runtime=3000, # 3000 ns + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + return results + + +@pytest.fixture +def sample_optimized_test_results() -> TestResults: + """Create sample optimized test results with improved timing information.""" + results = TestResults() + + # Test case 1: test_basic_functionality with multiple function calls (improved) + results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module1", + test_class_name=None, + test_function_name="test_basic_functionality", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/test_module1.py"), + did_pass=True, + runtime=800, # 800 ns (improved from 1000 ns) + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module1", + test_class_name=None, + test_function_name="test_basic_functionality", + function_getting_tested="my_function", + iteration_id="2", + ), + file_name=Path("/tmp/tests/test_module1.py"), + did_pass=True, + runtime=400, # 400 ns (improved from 500 ns) + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + # Test case 2: test_edge_cases (improved) + results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module1", + test_class_name="TestMyFunction", + test_function_name="test_edge_cases", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/test_module1.py"), + did_pass=True, + runtime=1500, # 1500 ns (improved from 2000 ns) + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + # Test case 3: test_performance (improved) + results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module2", + test_class_name=None, + test_function_name="test_performance", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/test_module2.py"), + did_pass=True, + runtime=2100, # 2100 ns (improved from 3000 ns) + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + return results + + +def test_existing_tests_source_for_without_timing_info( + sample_function_to_tests: dict[str, set[FunctionCalledInTest]], sample_tests_root: Path +): + """Test the function works without timing information (backward compatibility).""" + result = existing_tests_source_for("my_module.my_function", sample_function_to_tests, sample_tests_root) + + expected_lines = ["- test_module1.py", "- test_module2.py"] + + for line in expected_lines: + assert line in result + + # Should not contain any timing information + assert "->" not in result + assert "ns" not in result + + +def test_existing_tests_source_for_with_timing_info( + sample_function_to_tests: dict[str, set[FunctionCalledInTest]], + sample_tests_root: Path, + sample_original_test_results: TestResults, + sample_optimized_test_results: TestResults, +): + """Test the function includes timing information when provided.""" + with patch("codeflash.code_utils.time_utils.format_time") as mock_format_time: + # Mock format_time to return predictable values + mock_format_time.side_effect = lambda x: f"{x} ns" + + result = existing_tests_source_for( + "my_module.my_function", + sample_function_to_tests, + sample_tests_root, + sample_original_test_results, + sample_optimized_test_results, + ) + + # Should contain file names + assert "- test_module1.py" in result + assert "- test_module2.py" in result + + # Should contain test function names with timing (using min values now) + assert "test_basic_functionality: 500 ns -> 400 ns" in result # min(1000,500) -> min(800,400) + assert "test_edge_cases: 2000 ns -> 1500 ns" in result + assert "test_performance: 3000 ns -> 2100 ns" in result + + +def test_existing_tests_source_for_aggregates_multiple_function_calls( + sample_function_to_tests: dict[str, set[FunctionCalledInTest]], + sample_tests_root: Path, + sample_original_test_results: TestResults, + sample_optimized_test_results: TestResults, +): + """Test that multiple function calls within a test case use minimum timing.""" + with patch("codeflash.code_utils.time_utils.format_time") as mock_format_time: + mock_format_time.side_effect = lambda x: f"{x} ns" + + result = existing_tests_source_for( + "my_module.my_function", + sample_function_to_tests, + sample_tests_root, + sample_original_test_results, + sample_optimized_test_results, + ) + + # test_basic_functionality should show minimum timing: min(1000,500) -> min(800,400) + assert "test_basic_functionality: 500 ns -> 400 ns" in result + + +def test_existing_tests_source_for_only_includes_passing_tests( + sample_function_to_tests: dict[str, set[FunctionCalledInTest]], sample_tests_root: Path +): + """Test that only passing tests with runtime data are included in timing report.""" + original_results = TestResults() + optimized_results = TestResults() + + # Add a passing test with runtime + original_results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module1", + test_class_name=None, + test_function_name="test_basic_functionality", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/test_module1.py"), + did_pass=True, + runtime=1000, + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + optimized_results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module1", + test_class_name=None, + test_function_name="test_basic_functionality", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/test_module1.py"), + did_pass=True, + runtime=800, + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + # Add a failing test (should be excluded) + original_results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module1", + test_class_name="TestMyFunction", + test_function_name="test_edge_cases", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/test_module1.py"), + did_pass=False, # Failing test + runtime=2000, + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + # Add a test without runtime (should be excluded) + original_results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module2", + test_class_name=None, + test_function_name="test_performance", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/test_module2.py"), + did_pass=True, + runtime=None, # No runtime data + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + with patch("codeflash.code_utils.time_utils.format_time") as mock_format_time: + mock_format_time.side_effect = lambda x: f"{x} ns" + + result = existing_tests_source_for( + "my_module.my_function", sample_function_to_tests, sample_tests_root, original_results, optimized_results + ) + + # Should only include the passing test with runtime data + assert "test_basic_functionality: 1000 ns -> 800 ns" in result + # Should not include failing test or test without runtime + assert "test_edge_cases" not in result + assert "test_performance" not in result + + +def test_existing_tests_source_for_with_empty_test_mapping(sample_tests_root: Path): + """Test behavior when there are no tests for the function.""" + result = existing_tests_source_for("nonexistent.function", {}, sample_tests_root) + + assert result == "" + + +def test_existing_tests_source_for_missing_optimized_results( + sample_function_to_tests: dict[str, set[FunctionCalledInTest]], + sample_tests_root: Path, + sample_original_test_results: TestResults, +): + """Test behavior when optimized results are missing for some test cases.""" + # Create optimized results that are missing some test cases + optimized_results = TestResults() + optimized_results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.test_module1", + test_class_name=None, + test_function_name="test_basic_functionality", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/test_module1.py"), + did_pass=True, + runtime=800, + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + # Note: Missing test_edge_cases and test_performance optimized results + + with patch("codeflash.code_utils.time_utils.format_time") as mock_format_time: + mock_format_time.side_effect = lambda x: f"{x} ns" + + result = existing_tests_source_for( + "my_module.my_function", + sample_function_to_tests, + sample_tests_root, + sample_original_test_results, + optimized_results, + ) + + # Should not include test cases without both original and optimized results + assert "test_basic_functionality" not in result # Missing second function call + assert "test_edge_cases" not in result + assert "test_performance" not in result + + # Should still show file names + assert "- test_module1.py" in result + assert "- test_module2.py" in result + + +def test_existing_tests_source_for_sorted_output(sample_tests_root: Path): + """Test that output is properly sorted by file name and test function name.""" + # Create a more complex test mapping with multiple files and functions + test_file_a = sample_tests_root / "a_test_module.py" + test_file_z = sample_tests_root / "z_test_module.py" + + function_to_tests = { + "my_module.my_function": { + FunctionCalledInTest( + tests_in_file=TestsInFile( + test_file=test_file_z, + test_class=None, + test_function="z_test_function", + test_type=TestType.EXISTING_UNIT_TEST, + ), + position=CodePosition(line_no=10, col_no=4), + ), + FunctionCalledInTest( + tests_in_file=TestsInFile( + test_file=test_file_a, + test_class=None, + test_function="a_test_function", + test_type=TestType.EXISTING_UNIT_TEST, + ), + position=CodePosition(line_no=20, col_no=8), + ), + FunctionCalledInTest( + tests_in_file=TestsInFile( + test_file=test_file_a, + test_class=None, + test_function="b_test_function", + test_type=TestType.EXISTING_UNIT_TEST, + ), + position=CodePosition(line_no=30, col_no=8), + ), + } + } + + original_results = TestResults() + optimized_results = TestResults() + + # Add test results for all functions + for test_func in ["a_test_function", "b_test_function"]: + original_results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.a_test_module", + test_class_name=None, + test_function_name=test_func, + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/a_test_module.py"), + did_pass=True, + runtime=1000, + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + optimized_results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.a_test_module", + test_class_name=None, + test_function_name=test_func, + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/a_test_module.py"), + did_pass=True, + runtime=800, + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + original_results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.z_test_module", + test_class_name=None, + test_function_name="z_test_function", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/z_test_module.py"), + did_pass=True, + runtime=1000, + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + optimized_results.add( + FunctionTestInvocation( + id=InvocationId( + test_module_path="tests.z_test_module", + test_class_name=None, + test_function_name="z_test_function", + function_getting_tested="my_function", + iteration_id="1", + ), + file_name=Path("/tmp/tests/z_test_module.py"), + did_pass=True, + runtime=800, + test_framework="pytest", + test_type=TestType.EXISTING_UNIT_TEST, + return_value=None, + timed_out=False, + loop_index=1, + ) + ) + + with patch("codeflash.code_utils.time_utils.format_time") as mock_format_time: + mock_format_time.side_effect = lambda x: f"{x} ns" + + result = existing_tests_source_for( + "my_module.my_function", function_to_tests, sample_tests_root, original_results, optimized_results + ) + + lines = result.split("\n") + + # Files should be sorted alphabetically + a_file_index = next(i for i, line in enumerate(lines) if "a_test_module.py" in line) + z_file_index = next(i for i, line in enumerate(lines) if "z_test_module.py" in line) + assert a_file_index < z_file_index + + # Test functions within a file should be sorted alphabetically + a_func_index = next(i for i, line in enumerate(lines) if "a_test_function" in line) + b_func_index = next(i for i, line in enumerate(lines) if "b_test_function" in line) + assert a_func_index < b_func_index + + + +def test_existing_tests_source_for_format_time_called_correctly( + sample_function_to_tests: dict[str, set[FunctionCalledInTest]], + sample_tests_root: Path, + sample_original_test_results: TestResults, + sample_optimized_test_results: TestResults, +): + """Test that format_time is called with correct values (min of runtime lists).""" + with patch("codeflash.code_utils.time_utils.format_time") as mock_format_time: + mock_format_time.side_effect = lambda x: f"{x} ns" + + existing_tests_source_for( + "my_module.my_function", + sample_function_to_tests, + sample_tests_root, + sample_original_test_results, + sample_optimized_test_results, + ) + + # Check that format_time was called with the minimum values + call_args = [call[0][0] for call in mock_format_time.call_args_list] + + # Should include minimum values (not aggregated) + assert 500 in call_args # test_basic_functionality original: min(1000, 500) + assert 400 in call_args # test_basic_functionality optimized: min(800, 400) + assert 2000 in call_args # test_edge_cases original + assert 1500 in call_args # test_edge_cases optimized + assert 3000 in call_args # test_performance original + assert 2100 in call_args # test_performance optimized \ No newline at end of file From 3f1cbefcbc8f8abd8472c036063f05fed3ce9419 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Thu, 12 Jun 2025 16:41:36 -0700 Subject: [PATCH 02/16] wip need to do in a single loop --- codeflash/models/models.py | 1 + codeflash/result/create_pr.py | 112 +++++++++++++++++----------------- 2 files changed, 56 insertions(+), 57 deletions(-) diff --git a/codeflash/models/models.py b/codeflash/models/models.py index 02db2d0b6..bd4556965 100644 --- a/codeflash/models/models.py +++ b/codeflash/models/models.py @@ -557,6 +557,7 @@ def report_to_tree(report: dict[TestType, dict[str, int]], title: str) -> Tree: def usable_runtime_data_by_test_case(self) -> dict[InvocationId, list[int]]: # Efficient single traversal, directly accumulating into a dict. + # can track mins here and only sums can be return in total_passed_runtime by_id: dict[InvocationId, list[int]] = {} for result in self.test_results: if result.did_pass: diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index a58fc63ea..18207837d 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -31,64 +31,62 @@ def existing_tests_source_for( optimized_test_results: Optional[TestResults] = None, ) -> str: test_files = function_to_tests.get(function_qualified_name_with_modules_from_root) + if not test_files: + return "" existing_tests_unique = set() - - if test_files: - # Group test cases by test file - test_files_grouped = {} - for test_file in test_files: - file_path = Path(test_file.tests_in_file.test_file) - relative_path = str(file_path.relative_to(tests_root)) - - if relative_path not in test_files_grouped: - test_files_grouped[relative_path] = [] - test_files_grouped[relative_path].append(test_file) - - # Create detailed report for each test file - for relative_path, tests_in_file in sorted(test_files_grouped.items()): - file_line = f"- {relative_path}" - - # Add test case details with timing information if available - if original_test_results and optimized_test_results: - test_case_details = [] - - # Use the same pattern as add_runtime_comments_to_generated_tests - original_runtime_by_test = original_test_results.usable_runtime_data_by_test_case() - optimized_runtime_by_test = optimized_test_results.usable_runtime_data_by_test_case() - - # Collect test function names for this file - test_functions_in_file = {test_file.tests_in_file.test_function for test_file in tests_in_file} - - # Create timing report for each test function - for test_function_name in sorted(test_functions_in_file): - # Find matching runtime data - original_runtimes = [] - optimized_runtimes = [] - - for invocation_id, runtimes in original_runtime_by_test.items(): - if invocation_id.test_function_name == test_function_name: - original_runtimes.extend(runtimes) - - for invocation_id, runtimes in optimized_runtime_by_test.items(): - if invocation_id.test_function_name == test_function_name: - optimized_runtimes.extend(runtimes) - - if original_runtimes and optimized_runtimes: - # Use minimum timing like the generated tests function does - original_time = min(original_runtimes) - optimized_time = min(optimized_runtimes) - - from codeflash.code_utils.time_utils import format_time - - original_str = format_time(original_time) - optimized_str = format_time(optimized_time) - - test_case_details.append(f" - {test_function_name}: {original_str} -> {optimized_str}") - - if test_case_details: - file_line += "\n" + "\n".join(test_case_details) - - existing_tests_unique.add(file_line) + # a lot of loops, need to do in a single loop + #original_runtime_by_test = original_test_results.usable_runtime_data_by_test_case() + #optimized_runtime_by_test = optimized_test_results.usable_runtime_data_by_test_case() + # Group test cases by test file + test_files_grouped = {} + for test_file in test_files: + file_path = Path(test_file.tests_in_file.test_file) + relative_path = str(file_path.relative_to(tests_root)) + + if relative_path not in test_files_grouped: + test_files_grouped[relative_path] = [] + test_files_grouped.setdefault(relative_path,[]).append(test_file) + + # Create detailed report for each test file + # for relative_path, tests_in_file in sorted(test_files_grouped.items()): + file_line = f"- {relative_path}" + + # Add test case details with timing information if available + #if original_test_results and optimized_test_results: + test_case_details = [] + # Collect test function names for this file + test_functions_in_file = {test_file.tests_in_file.test_function for test_file in tests_in_file} + + # Create timing report for each test function + for test_function_name in sorted(test_functions_in_file): + # Find matching runtime data + original_runtimes = [] + optimized_runtimes = [] + + for invocation_id, runtimes in original_runtime_by_test.items(): + if invocation_id.test_function_name == test_function_name: + original_runtimes.extend(runtimes) + + for invocation_id, runtimes in optimized_runtime_by_test.items(): + if invocation_id.test_function_name == test_function_name: + optimized_runtimes.extend(runtimes) + + if original_runtimes and optimized_runtimes: + # Use minimum timing like the generated tests function does + original_time = min(original_runtimes) + optimized_time = min(optimized_runtimes) + + from codeflash.code_utils.time_utils import format_time + + original_str = format_time(original_time) + optimized_str = format_time(optimized_time) + + test_case_details.append(f" - {test_function_name}: {original_str} -> {optimized_str}") + + if test_case_details: + file_line += "\n" + "\n".join(test_case_details) + + existing_tests_unique.add(file_line) return "\n".join(sorted(existing_tests_unique)) From a09d11cca459d5a4d8ae2300740192b553777a31 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Mon, 16 Jun 2025 18:57:22 -0700 Subject: [PATCH 03/16] works --- codeflash/code_utils/edit_generated_tests.py | 11 +- codeflash/optimization/function_optimizer.py | 16 ++- codeflash/result/create_pr.py | 118 +++++++++---------- 3 files changed, 72 insertions(+), 73 deletions(-) diff --git a/codeflash/code_utils/edit_generated_tests.py b/codeflash/code_utils/edit_generated_tests.py index 4e6e31072..65b518ac3 100644 --- a/codeflash/code_utils/edit_generated_tests.py +++ b/codeflash/code_utils/edit_generated_tests.py @@ -4,7 +4,7 @@ from codeflash.cli_cmds.console import logger from codeflash.code_utils.time_utils import format_time -from codeflash.models.models import GeneratedTests, GeneratedTestsList, TestResults +from codeflash.models.models import GeneratedTests, GeneratedTestsList def remove_functions_from_generated_tests( @@ -33,12 +33,9 @@ def remove_functions_from_generated_tests( def add_runtime_comments_to_generated_tests( - generated_tests: GeneratedTestsList, original_test_results: TestResults, optimized_test_results: TestResults + generated_tests: GeneratedTestsList, original_runtimes: dict, optimized_runtimes: dict ) -> GeneratedTestsList: """Add runtime performance comments to function calls in generated tests.""" - # Create dictionaries for fast lookup of runtime data - original_runtime_by_test = original_test_results.usable_runtime_data_by_test_case() - optimized_runtime_by_test = optimized_test_results.usable_runtime_data_by_test_case() class RuntimeCommentTransformer(cst.CSTTransformer): def __init__(self) -> None: @@ -84,11 +81,11 @@ def leave_SimpleStatementLine( matching_original_times = [] matching_optimized_times = [] - for invocation_id, runtimes in original_runtime_by_test.items(): + for invocation_id, runtimes in original_runtimes.items(): if invocation_id.test_function_name == self.current_test_name: matching_original_times.extend(runtimes) - for invocation_id, runtimes in optimized_runtime_by_test.items(): + for invocation_id, runtimes in optimized_runtimes.items(): if invocation_id.test_function_name == self.current_test_name: matching_optimized_times.extend(runtimes) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 26d8ede73..4d5dc6f5b 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -354,11 +354,15 @@ def optimize_function(self) -> Result[BestOptimization, str]: # noqa: PLR0911 generated_tests = remove_functions_from_generated_tests( generated_tests=generated_tests, test_functions_to_remove=test_functions_to_remove ) + original_runtime_by_test = ( + original_code_baseline.benchmarking_test_results.usable_runtime_data_by_test_case() + ) + optimized_runtime_by_test = ( + best_optimization.winning_benchmarking_test_results.usable_runtime_data_by_test_case() + ) # Add runtime comments to generated tests before creating the PR generated_tests = add_runtime_comments_to_generated_tests( - generated_tests, - original_code_baseline.benchmarking_test_results, - best_optimization.winning_benchmarking_test_results, + generated_tests, original_runtime_by_test, optimized_runtime_by_test ) generated_tests_str = "\n\n".join( [test.generated_original_test_source for test in generated_tests.generated_tests] @@ -366,9 +370,9 @@ def optimize_function(self) -> Result[BestOptimization, str]: # noqa: PLR0911 existing_tests = existing_tests_source_for( self.function_to_optimize.qualified_name_with_modules_from_root(self.project_root), function_to_all_tests, - tests_root=self.test_cfg.tests_root, - original_test_results=original_code_baseline.benchmarking_test_results, - optimized_test_results=best_optimization.winning_benchmarking_test_results, + test_cfg=self.test_cfg, + original_runtimes_all=original_runtime_by_test, + optimized_runtimes_all=optimized_runtime_by_test, ) if concolic_test_str: generated_tests_str += "\n\n" + concolic_test_str diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index 18207837d..4830c0080 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os from pathlib import Path from typing import TYPE_CHECKING, Optional @@ -16,79 +17,76 @@ git_root_dir, ) from codeflash.code_utils.github_utils import github_pr_url +from codeflash.code_utils.time_utils import format_time from codeflash.github.PrComment import FileDiffContent, PrComment if TYPE_CHECKING: - from codeflash.models.models import FunctionCalledInTest, TestResults + from codeflash.models.models import FunctionCalledInTest from codeflash.result.explanation import Explanation + from codeflash.verification.verification_utils import TestConfig def existing_tests_source_for( function_qualified_name_with_modules_from_root: str, function_to_tests: dict[str, set[FunctionCalledInTest]], - tests_root: Path, - original_test_results: Optional[TestResults] = None, - optimized_test_results: Optional[TestResults] = None, + test_cfg: TestConfig, + original_runtimes_all: dict, + optimized_runtimes_all: dict, ) -> str: test_files = function_to_tests.get(function_qualified_name_with_modules_from_root) if not test_files: return "" - existing_tests_unique = set() - # a lot of loops, need to do in a single loop - #original_runtime_by_test = original_test_results.usable_runtime_data_by_test_case() - #optimized_runtime_by_test = optimized_test_results.usable_runtime_data_by_test_case() - # Group test cases by test file - test_files_grouped = {} - for test_file in test_files: - file_path = Path(test_file.tests_in_file.test_file) - relative_path = str(file_path.relative_to(tests_root)) - - if relative_path not in test_files_grouped: - test_files_grouped[relative_path] = [] - test_files_grouped.setdefault(relative_path,[]).append(test_file) - - # Create detailed report for each test file - # for relative_path, tests_in_file in sorted(test_files_grouped.items()): - file_line = f"- {relative_path}" - - # Add test case details with timing information if available - #if original_test_results and optimized_test_results: - test_case_details = [] - # Collect test function names for this file - test_functions_in_file = {test_file.tests_in_file.test_function for test_file in tests_in_file} - - # Create timing report for each test function - for test_function_name in sorted(test_functions_in_file): - # Find matching runtime data - original_runtimes = [] - optimized_runtimes = [] - - for invocation_id, runtimes in original_runtime_by_test.items(): - if invocation_id.test_function_name == test_function_name: - original_runtimes.extend(runtimes) - - for invocation_id, runtimes in optimized_runtime_by_test.items(): - if invocation_id.test_function_name == test_function_name: - optimized_runtimes.extend(runtimes) - - if original_runtimes and optimized_runtimes: - # Use minimum timing like the generated tests function does - original_time = min(original_runtimes) - optimized_time = min(optimized_runtimes) - - from codeflash.code_utils.time_utils import format_time - - original_str = format_time(original_time) - optimized_str = format_time(optimized_time) - - test_case_details.append(f" - {test_function_name}: {original_str} -> {optimized_str}") - - if test_case_details: - file_line += "\n" + "\n".join(test_case_details) - - existing_tests_unique.add(file_line) - - return "\n".join(sorted(existing_tests_unique)) + output = "" + tests_root = test_cfg.tests_root + module_root = test_cfg.project_root_path + rel_tests_root = tests_root.relative_to(module_root) + original_tests_to_runtimes = {} + optimized_tests_to_runtimes = {} + # TODO confirm that original and optimized have the same keys + all_invocation_ids = original_runtimes_all.keys() | optimized_runtimes_all.keys() + for invocation_id in all_invocation_ids: + rel_path = ( + Path(invocation_id.test_module_path.replace(".", os.sep)).with_suffix(".py").relative_to(rel_tests_root) + ) + if rel_path not in original_tests_to_runtimes: + original_tests_to_runtimes[rel_path] = {} + if rel_path not in optimized_tests_to_runtimes: + optimized_tests_to_runtimes[rel_path] = {} + qualified_name = ( + invocation_id.test_class_name + "." + invocation_id.test_function_name + if invocation_id.test_class_name + else invocation_id.test_function_name + ) + if qualified_name not in original_tests_to_runtimes[rel_path]: + original_tests_to_runtimes[rel_path][qualified_name] = 0 + if qualified_name not in optimized_tests_to_runtimes[rel_path]: + optimized_tests_to_runtimes[rel_path][qualified_name] = 0 + if invocation_id in original_runtimes_all: + original_tests_to_runtimes[rel_path][qualified_name] += min(original_runtimes_all[invocation_id]) + if invocation_id in optimized_runtimes_all: + optimized_tests_to_runtimes[rel_path][qualified_name] += min(optimized_runtimes_all[invocation_id]) + # parse into string + all_rel_paths = ( + original_tests_to_runtimes.keys() + ) # both will have the same keys as some default values are assigned in the previous loop + for filename in sorted(all_rel_paths): + output += f"- {filename}\n" + all_qualified_names = original_tests_to_runtimes[ + filename + ].keys() # both will have the same keys as some default values are assigned in the previous loop + for qualified_name in sorted(all_qualified_names): + # if not present in optimized output nan + if optimized_tests_to_runtimes[filename][qualified_name] == 0: + print_optimized_runtime = "NaN" + else: + print_optimized_runtime = format_time(optimized_tests_to_runtimes[filename][qualified_name]) + if original_tests_to_runtimes[filename][qualified_name] == 0: + print_original_runtime = "NaN" + else: + print_original_runtime = format_time(original_tests_to_runtimes[filename][qualified_name]) + output += f" - {qualified_name}: {print_original_runtime} -> {print_optimized_runtime}\n" + output += "\n" + return output def check_create_pr( From 30259c0ad893111be5031d9719915d8f4fe9d562 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Mon, 16 Jun 2025 20:44:49 -0700 Subject: [PATCH 04/16] improve runtimecomments --- codeflash/code_utils/edit_generated_tests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codeflash/code_utils/edit_generated_tests.py b/codeflash/code_utils/edit_generated_tests.py index 65b518ac3..2d713c1a3 100644 --- a/codeflash/code_utils/edit_generated_tests.py +++ b/codeflash/code_utils/edit_generated_tests.py @@ -37,6 +37,7 @@ def add_runtime_comments_to_generated_tests( ) -> GeneratedTestsList: """Add runtime performance comments to function calls in generated tests.""" + # TODO: reduce for loops to one class RuntimeCommentTransformer(cst.CSTTransformer): def __init__(self) -> None: self.in_test_function = False @@ -80,7 +81,7 @@ def leave_SimpleStatementLine( # Find matching test cases by looking for this test function name in the test results matching_original_times = [] matching_optimized_times = [] - + # TODO : will not work if there are multiple test cases with the same name, match filename + test class + test function name for invocation_id, runtimes in original_runtimes.items(): if invocation_id.test_function_name == self.current_test_name: matching_original_times.extend(runtimes) From 0d566bf4cb2689b9de6319d7e6ab20c9167762e9 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Mon, 16 Jun 2025 22:32:25 -0700 Subject: [PATCH 05/16] text highlight doesnt work --- codeflash/result/create_pr.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index 4830c0080..b0ee64b87 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -42,12 +42,17 @@ def existing_tests_source_for( rel_tests_root = tests_root.relative_to(module_root) original_tests_to_runtimes = {} optimized_tests_to_runtimes = {} + non_generated_tests = set() + for test_file in test_files: + non_generated_tests.add(Path(test_file.tests_in_file.test_file).relative_to(tests_root)) # TODO confirm that original and optimized have the same keys all_invocation_ids = original_runtimes_all.keys() | optimized_runtimes_all.keys() for invocation_id in all_invocation_ids: rel_path = ( Path(invocation_id.test_module_path.replace(".", os.sep)).with_suffix(".py").relative_to(rel_tests_root) ) + if rel_path not in non_generated_tests: + continue if rel_path not in original_tests_to_runtimes: original_tests_to_runtimes[rel_path] = {} if rel_path not in optimized_tests_to_runtimes: @@ -84,7 +89,23 @@ def existing_tests_source_for( print_original_runtime = "NaN" else: print_original_runtime = format_time(original_tests_to_runtimes[filename][qualified_name]) - output += f" - {qualified_name}: {print_original_runtime} -> {print_optimized_runtime}\n" + arrow = "\\rightarrow" + if ( + original_tests_to_runtimes[filename][qualified_name] != 0 + and optimized_tests_to_runtimes[filename][qualified_name] != 0 + ): + greater = ( + optimized_tests_to_runtimes[filename][qualified_name] + > original_tests_to_runtimes[filename][qualified_name] + ) + if greater: + output += f" - $$\\color{{red}}{qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime}$$\n" + else: + output += f" - $$\\color{{green}}{qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime}$$\n" + else: + # one of them is NaN + output += f" - $$\\color{{blue}}{qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime}$$\n" + # output += f"$$\\colorbox{{pink}}\{{ - {qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime}}}$$\n" output += "\n" return output From 30410ff60512aeaedcd47de6f7b7dbd41f09320e Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Tue, 17 Jun 2025 14:42:27 -0700 Subject: [PATCH 06/16] works --- codeflash/code_utils/edit_generated_tests.py | 76 ++++++++++++++------ codeflash/optimization/function_optimizer.py | 2 +- 2 files changed, 56 insertions(+), 22 deletions(-) diff --git a/codeflash/code_utils/edit_generated_tests.py b/codeflash/code_utils/edit_generated_tests.py index 2d713c1a3..0a149fb97 100644 --- a/codeflash/code_utils/edit_generated_tests.py +++ b/codeflash/code_utils/edit_generated_tests.py @@ -1,10 +1,13 @@ +import os import re +from pathlib import Path import libcst as cst from codeflash.cli_cmds.console import logger from codeflash.code_utils.time_utils import format_time from codeflash.models.models import GeneratedTests, GeneratedTestsList +from codeflash.verification.verification_utils import TestConfig def remove_functions_from_generated_tests( @@ -33,28 +36,36 @@ def remove_functions_from_generated_tests( def add_runtime_comments_to_generated_tests( - generated_tests: GeneratedTestsList, original_runtimes: dict, optimized_runtimes: dict + test_cfg: TestConfig, generated_tests: GeneratedTestsList, original_runtimes: dict, optimized_runtimes: dict ) -> GeneratedTestsList: """Add runtime performance comments to function calls in generated tests.""" + tests_root = test_cfg.tests_root + module_root = test_cfg.project_root_path + rel_tests_root = tests_root.relative_to(module_root) # TODO: reduce for loops to one class RuntimeCommentTransformer(cst.CSTTransformer): - def __init__(self) -> None: - self.in_test_function = False - self.current_test_name: str | None = None + def __init__(self, test: GeneratedTests, tests_root: Path, rel_tests_root: Path) -> None: + self.test = test + self.context_stack = [] + self.tests_root = tests_root + self.rel_tests_root = rel_tests_root + + def visit_ClassDef(self, node: cst.ClassDef) -> None: + # Track when we enter a class + self.context_stack.append(node.name.value) + + def leave_ClassDef(self, original_node: cst.ClassDef, updated_node: cst.ClassDef) -> cst.ClassDef: # noqa: ARG002 + # Pop the context when we leave a class + self.context_stack.pop() + return updated_node def visit_FunctionDef(self, node: cst.FunctionDef) -> None: - if node.name.value.startswith("test_"): - self.in_test_function = True - self.current_test_name = node.name.value - else: - self.in_test_function = False - self.current_test_name = None - - def leave_FunctionDef(self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef) -> cst.FunctionDef: - if original_node.name.value.startswith("test_"): - self.in_test_function = False - self.current_test_name = None + self.context_stack.append(node.name.value) + + def leave_FunctionDef(self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef) -> cst.FunctionDef: # noqa: ARG002 + # Pop the context when we leave a function + self.context_stack.pop() return updated_node def leave_SimpleStatementLine( @@ -62,9 +73,6 @@ def leave_SimpleStatementLine( original_node: cst.SimpleStatementLine, # noqa: ARG002 updated_node: cst.SimpleStatementLine, ) -> cst.SimpleStatementLine: - if not self.in_test_function or not self.current_test_name: - return updated_node - # Look for assignment statements that assign to codeflash_output # Handle both single statements and multiple statements on one line codeflash_assignment_found = False @@ -83,11 +91,37 @@ def leave_SimpleStatementLine( matching_optimized_times = [] # TODO : will not work if there are multiple test cases with the same name, match filename + test class + test function name for invocation_id, runtimes in original_runtimes.items(): - if invocation_id.test_function_name == self.current_test_name: + qualified_name = ( + invocation_id.test_class_name + "." + invocation_id.test_function_name + if invocation_id.test_class_name + else invocation_id.test_function_name + ) + rel_path = ( + Path(invocation_id.test_module_path.replace(".", os.sep)) + .with_suffix(".py") + .relative_to(self.rel_tests_root) + ) + if qualified_name == ".".join(self.context_stack) and rel_path in [ + self.test.behavior_file_path.relative_to(self.tests_root), + self.test.perf_file_path.relative_to(self.tests_root), + ]: matching_original_times.extend(runtimes) for invocation_id, runtimes in optimized_runtimes.items(): - if invocation_id.test_function_name == self.current_test_name: + qualified_name = ( + invocation_id.test_class_name + "." + invocation_id.test_function_name + if invocation_id.test_class_name + else invocation_id.test_function_name + ) + rel_path = ( + Path(invocation_id.test_module_path.replace(".", os.sep)) + .with_suffix(".py") + .relative_to(self.rel_tests_root) + ) + if qualified_name == ".".join(self.context_stack) and rel_path in [ + self.test.behavior_file_path.relative_to(self.tests_root), + self.test.perf_file_path.relative_to(self.tests_root), + ]: matching_optimized_times.extend(runtimes) if matching_original_times and matching_optimized_times: @@ -116,7 +150,7 @@ def leave_SimpleStatementLine( tree = cst.parse_module(test.generated_original_test_source) # Transform the tree to add runtime comments - transformer = RuntimeCommentTransformer() + transformer = RuntimeCommentTransformer(test, tests_root, rel_tests_root) modified_tree = tree.visit(transformer) # Convert back to source code diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 4d5dc6f5b..c94759369 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -362,7 +362,7 @@ def optimize_function(self) -> Result[BestOptimization, str]: # noqa: PLR0911 ) # Add runtime comments to generated tests before creating the PR generated_tests = add_runtime_comments_to_generated_tests( - generated_tests, original_runtime_by_test, optimized_runtime_by_test + self.test_cfg, generated_tests, original_runtime_by_test, optimized_runtime_by_test ) generated_tests_str = "\n\n".join( [test.generated_original_test_source for test in generated_tests.generated_tests] From 4a68aa0854b1856f94d8e1e6473348a86d2c1fc9 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Tue, 17 Jun 2025 15:35:40 -0700 Subject: [PATCH 07/16] minor fixes --- codeflash/code_utils/edit_generated_tests.py | 5 +++-- codeflash/result/create_pr.py | 13 +++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/codeflash/code_utils/edit_generated_tests.py b/codeflash/code_utils/edit_generated_tests.py index 0a149fb97..03e9ad2e1 100644 --- a/codeflash/code_utils/edit_generated_tests.py +++ b/codeflash/code_utils/edit_generated_tests.py @@ -7,6 +7,7 @@ from codeflash.cli_cmds.console import logger from codeflash.code_utils.time_utils import format_time from codeflash.models.models import GeneratedTests, GeneratedTestsList +from codeflash.result.critic import performance_gain from codeflash.verification.verification_utils import TestConfig @@ -127,9 +128,9 @@ def leave_SimpleStatementLine( if matching_original_times and matching_optimized_times: original_time = min(matching_original_times) optimized_time = min(matching_optimized_times) - + perf_gain = performance_gain(original_runtime_ns=original_time, optimized_runtime_ns=optimized_time) # Create the runtime comment - comment_text = f"# {format_time(original_time)} -> {format_time(optimized_time)}" + comment_text = f"# {format_time(original_time)} -> {format_time(optimized_time)} ({perf_gain:.2f}%)" # Add comment to the trailing whitespace new_trailing_whitespace = cst.TrailingWhitespace( diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index b0ee64b87..81afd303a 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -19,6 +19,7 @@ from codeflash.code_utils.github_utils import github_pr_url from codeflash.code_utils.time_utils import format_time from codeflash.github.PrComment import FileDiffContent, PrComment +from codeflash.result.critic import performance_gain if TYPE_CHECKING: from codeflash.models.models import FunctionCalledInTest @@ -89,7 +90,7 @@ def existing_tests_source_for( print_original_runtime = "NaN" else: print_original_runtime = format_time(original_tests_to_runtimes[filename][qualified_name]) - arrow = "\\rightarrow" + arrow = "->" if ( original_tests_to_runtimes[filename][qualified_name] != 0 and optimized_tests_to_runtimes[filename][qualified_name] != 0 @@ -98,13 +99,17 @@ def existing_tests_source_for( optimized_tests_to_runtimes[filename][qualified_name] > original_tests_to_runtimes[filename][qualified_name] ) + perf_gain = performance_gain( + original_runtime_ns=original_tests_to_runtimes[filename][qualified_name], + optimized_runtime_ns=optimized_tests_to_runtimes[filename][qualified_name], + ) if greater: - output += f" - $$\\color{{red}}{qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime}$$\n" + output += f" - {qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime} $$\\color{{red}}({perf_gain:.2f}\\\\%)$$\n" else: - output += f" - $$\\color{{green}}{qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime}$$\n" + output += f" - {qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime} $$\\color{{green}}({perf_gain:.2f}\\\\%)$$\n" else: # one of them is NaN - output += f" - $$\\color{{blue}}{qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime}$$\n" + output += f" - {qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime}\n" # output += f"$$\\colorbox{{pink}}\{{ - {qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime}}}$$\n" output += "\n" return output From e202289686e033e5a819d5c32a66528ac52c282f Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Tue, 17 Jun 2025 16:01:25 -0700 Subject: [PATCH 08/16] minor fixes --- codeflash/code_utils/edit_generated_tests.py | 13 ++++++++----- codeflash/result/create_pr.py | 20 ++++++++++---------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/codeflash/code_utils/edit_generated_tests.py b/codeflash/code_utils/edit_generated_tests.py index 03e9ad2e1..ee50af0bd 100644 --- a/codeflash/code_utils/edit_generated_tests.py +++ b/codeflash/code_utils/edit_generated_tests.py @@ -6,7 +6,7 @@ from codeflash.cli_cmds.console import logger from codeflash.code_utils.time_utils import format_time -from codeflash.models.models import GeneratedTests, GeneratedTestsList +from codeflash.models.models import GeneratedTests, GeneratedTestsList, InvocationId from codeflash.result.critic import performance_gain from codeflash.verification.verification_utils import TestConfig @@ -37,7 +37,10 @@ def remove_functions_from_generated_tests( def add_runtime_comments_to_generated_tests( - test_cfg: TestConfig, generated_tests: GeneratedTestsList, original_runtimes: dict, optimized_runtimes: dict + test_cfg: TestConfig, + generated_tests: GeneratedTestsList, + original_runtimes: dict[InvocationId, list[int]], + optimized_runtimes: dict[InvocationId, list[int]], ) -> GeneratedTestsList: """Add runtime performance comments to function calls in generated tests.""" tests_root = test_cfg.tests_root @@ -48,7 +51,7 @@ def add_runtime_comments_to_generated_tests( class RuntimeCommentTransformer(cst.CSTTransformer): def __init__(self, test: GeneratedTests, tests_root: Path, rel_tests_root: Path) -> None: self.test = test - self.context_stack = [] + self.context_stack: list[str] = [] self.tests_root = tests_root self.rel_tests_root = rel_tests_root @@ -93,7 +96,7 @@ def leave_SimpleStatementLine( # TODO : will not work if there are multiple test cases with the same name, match filename + test class + test function name for invocation_id, runtimes in original_runtimes.items(): qualified_name = ( - invocation_id.test_class_name + "." + invocation_id.test_function_name + invocation_id.test_class_name + "." + invocation_id.test_function_name # type: ignore[operator] if invocation_id.test_class_name else invocation_id.test_function_name ) @@ -110,7 +113,7 @@ def leave_SimpleStatementLine( for invocation_id, runtimes in optimized_runtimes.items(): qualified_name = ( - invocation_id.test_class_name + "." + invocation_id.test_function_name + invocation_id.test_class_name + "." + invocation_id.test_function_name # type: ignore[operator] if invocation_id.test_class_name else invocation_id.test_function_name ) diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index 81afd303a..8b184e5cb 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -22,7 +22,7 @@ from codeflash.result.critic import performance_gain if TYPE_CHECKING: - from codeflash.models.models import FunctionCalledInTest + from codeflash.models.models import FunctionCalledInTest, InvocationId from codeflash.result.explanation import Explanation from codeflash.verification.verification_utils import TestConfig @@ -31,8 +31,8 @@ def existing_tests_source_for( function_qualified_name_with_modules_from_root: str, function_to_tests: dict[str, set[FunctionCalledInTest]], test_cfg: TestConfig, - original_runtimes_all: dict, - optimized_runtimes_all: dict, + original_runtimes_all: dict[InvocationId, list[int]], + optimized_runtimes_all: dict[InvocationId, list[int]], ) -> str: test_files = function_to_tests.get(function_qualified_name_with_modules_from_root) if not test_files: @@ -41,8 +41,8 @@ def existing_tests_source_for( tests_root = test_cfg.tests_root module_root = test_cfg.project_root_path rel_tests_root = tests_root.relative_to(module_root) - original_tests_to_runtimes = {} - optimized_tests_to_runtimes = {} + original_tests_to_runtimes: dict[Path, dict[str, int]] = {} + optimized_tests_to_runtimes: dict[Path, dict[str, int]] = {} non_generated_tests = set() for test_file in test_files: non_generated_tests.add(Path(test_file.tests_in_file.test_file).relative_to(tests_root)) @@ -59,18 +59,18 @@ def existing_tests_source_for( if rel_path not in optimized_tests_to_runtimes: optimized_tests_to_runtimes[rel_path] = {} qualified_name = ( - invocation_id.test_class_name + "." + invocation_id.test_function_name + invocation_id.test_class_name + "." + invocation_id.test_function_name # type: ignore[operator] if invocation_id.test_class_name else invocation_id.test_function_name ) if qualified_name not in original_tests_to_runtimes[rel_path]: - original_tests_to_runtimes[rel_path][qualified_name] = 0 + original_tests_to_runtimes[rel_path][qualified_name] = 0 # type: ignore[index] if qualified_name not in optimized_tests_to_runtimes[rel_path]: - optimized_tests_to_runtimes[rel_path][qualified_name] = 0 + optimized_tests_to_runtimes[rel_path][qualified_name] = 0 # type: ignore[index] if invocation_id in original_runtimes_all: - original_tests_to_runtimes[rel_path][qualified_name] += min(original_runtimes_all[invocation_id]) + original_tests_to_runtimes[rel_path][qualified_name] += min(original_runtimes_all[invocation_id]) # type: ignore[index] if invocation_id in optimized_runtimes_all: - optimized_tests_to_runtimes[rel_path][qualified_name] += min(optimized_runtimes_all[invocation_id]) + optimized_tests_to_runtimes[rel_path][qualified_name] += min(optimized_runtimes_all[invocation_id]) # type: ignore[index] # parse into string all_rel_paths = ( original_tests_to_runtimes.keys() From 7b4bdd053addca49e5491a2ada13b36dd322ea54 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Tue, 17 Jun 2025 16:29:54 -0700 Subject: [PATCH 09/16] fix tests, correct perf calc --- codeflash/code_utils/edit_generated_tests.py | 4 +++- codeflash/result/create_pr.py | 9 ++++++--- tests/test_add_runtime_comments.py | 13 +++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/codeflash/code_utils/edit_generated_tests.py b/codeflash/code_utils/edit_generated_tests.py index ee50af0bd..94c18ab5c 100644 --- a/codeflash/code_utils/edit_generated_tests.py +++ b/codeflash/code_utils/edit_generated_tests.py @@ -131,7 +131,9 @@ def leave_SimpleStatementLine( if matching_original_times and matching_optimized_times: original_time = min(matching_original_times) optimized_time = min(matching_optimized_times) - perf_gain = performance_gain(original_runtime_ns=original_time, optimized_runtime_ns=optimized_time) + perf_gain = ( + performance_gain(original_runtime_ns=original_time, optimized_runtime_ns=optimized_time) * 100 + ) # Create the runtime comment comment_text = f"# {format_time(original_time)} -> {format_time(optimized_time)} ({perf_gain:.2f}%)" diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index 8b184e5cb..72ef6d244 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -99,9 +99,12 @@ def existing_tests_source_for( optimized_tests_to_runtimes[filename][qualified_name] > original_tests_to_runtimes[filename][qualified_name] ) - perf_gain = performance_gain( - original_runtime_ns=original_tests_to_runtimes[filename][qualified_name], - optimized_runtime_ns=optimized_tests_to_runtimes[filename][qualified_name], + perf_gain = ( + performance_gain( + original_runtime_ns=original_tests_to_runtimes[filename][qualified_name], + optimized_runtime_ns=optimized_tests_to_runtimes[filename][qualified_name], + ) + * 100 ) if greater: output += f" - {qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime} $$\\color{{red}}({perf_gain:.2f}\\\\%)$$\n" diff --git a/tests/test_add_runtime_comments.py b/tests/test_add_runtime_comments.py index 51c1ef052..6de1b461c 100644 --- a/tests/test_add_runtime_comments.py +++ b/tests/test_add_runtime_comments.py @@ -12,6 +12,7 @@ TestType, VerificationType, ) +from codeflash.verification.verification_utils import TestConfig class TestAddRuntimeComments: @@ -48,6 +49,18 @@ def test_basic_runtime_comment_addition(self): assert codeflash_output == [1, 2, 3] """ + """test_cfg: TestConfig, + generated_tests: GeneratedTestsList, + original_runtimes: dict[InvocationId, list[int]], + optimized_runtimes: dict[InvocationId, list[int]]""" + project_root_path = file_path.parent.resolve() + test_config = TestConfig( + tests_root="tests", + tests_project_rootdir=Path.cwd(), + project_root_path=project_root_path, + test_framework="pytest", + pytest_cmd="pytest", + ) generated_test = GeneratedTests( generated_original_test_source=test_source, instrumented_behavior_test_source="", From 2ad102972a1aa2d2dd8586bee0c71849e6ed5d36 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Tue, 17 Jun 2025 17:00:25 -0700 Subject: [PATCH 10/16] tests wip --- tests/test_add_runtime_comments.py | 589 ++++++---------- tests/test_existing_tests_source_for.py | 901 +++++++++--------------- 2 files changed, 548 insertions(+), 942 deletions(-) diff --git a/tests/test_add_runtime_comments.py b/tests/test_add_runtime_comments.py index 6de1b461c..bbb833751 100644 --- a/tests/test_add_runtime_comments.py +++ b/tests/test_add_runtime_comments.py @@ -1,477 +1,338 @@ -"""Tests for the add_runtime_comments_to_generated_tests functionality.""" - +import os from pathlib import Path +from unittest.mock import Mock + +import pytest from codeflash.code_utils.edit_generated_tests import add_runtime_comments_to_generated_tests -from codeflash.models.models import ( - FunctionTestInvocation, - GeneratedTests, - GeneratedTestsList, - InvocationId, - TestResults, - TestType, - VerificationType, -) +from codeflash.models.models import GeneratedTests, GeneratedTestsList, InvocationId from codeflash.verification.verification_utils import TestConfig -class TestAddRuntimeComments: - """Test cases for add_runtime_comments_to_generated_tests method.""" - - def create_test_invocation( - self, test_function_name: str, runtime: int, loop_index: int = 1, iteration_id: str = "1", did_pass: bool = True - ) -> FunctionTestInvocation: - """Helper to create test invocation objects.""" - return FunctionTestInvocation( - loop_index=loop_index, - id=InvocationId( - test_module_path="test_module", - test_class_name=None, - test_function_name=test_function_name, - function_getting_tested="test_function", - iteration_id=iteration_id, - ), - file_name=Path("test.py"), - did_pass=did_pass, - runtime=runtime, - test_framework="pytest", - test_type=TestType.GENERATED_REGRESSION, - return_value=None, - timed_out=False, - verification_type=VerificationType.FUNCTION_CALL, - ) - - def test_basic_runtime_comment_addition(self): - """Test basic functionality of adding runtime comments.""" - # Create test source code - test_source = """def test_bubble_sort(): - codeflash_output = bubble_sort([3, 1, 2]) - assert codeflash_output == [1, 2, 3] -""" - - """test_cfg: TestConfig, - generated_tests: GeneratedTestsList, - original_runtimes: dict[InvocationId, list[int]], - optimized_runtimes: dict[InvocationId, list[int]]""" - project_root_path = file_path.parent.resolve() - test_config = TestConfig( - tests_root="tests", - tests_project_rootdir=Path.cwd(), - project_root_path=project_root_path, - test_framework="pytest", - pytest_cmd="pytest", - ) - generated_test = GeneratedTests( - generated_original_test_source=test_source, - instrumented_behavior_test_source="", - instrumented_perf_test_source="", - behavior_file_path=Path("test_behavior.py"), - perf_file_path=Path("test_perf.py"), - ) - - generated_tests = GeneratedTestsList(generated_tests=[generated_test]) - - # Create test results - original_test_results = TestResults() - optimized_test_results = TestResults() - - # Add test invocations with different runtimes - original_invocation = self.create_test_invocation("test_bubble_sort", 500_000) # 500μs - optimized_invocation = self.create_test_invocation("test_bubble_sort", 300_000) # 300μs +@pytest.fixture +def test_config(): + """Create a mock TestConfig for testing.""" + config = Mock(spec=TestConfig) + config.project_root_path = Path("/project") + config.tests_root = Path("/project/tests") + return config - original_test_results.add(original_invocation) - optimized_test_results.add(optimized_invocation) - # Test the functionality - result = add_runtime_comments_to_generated_tests(generated_tests, original_test_results, optimized_test_results) +@pytest.fixture +def sample_invocation_id(): + """Create a sample InvocationId for testing.""" + return InvocationId( + test_module_path="tests.test_module", + test_class_name="TestClass", + test_function_name="test_function", + ) - # Check that comments were added - modified_source = result.generated_tests[0].generated_original_test_source - assert "# 500μs -> 300μs" in modified_source - assert "codeflash_output = bubble_sort([3, 1, 2]) # 500μs -> 300μs" in modified_source - def test_multiple_test_functions(self): - """Test handling multiple test functions in the same file.""" - test_source = """def test_bubble_sort(): - codeflash_output = bubble_sort([3, 1, 2]) - assert codeflash_output == [1, 2, 3] +@pytest.fixture +def sample_invocation_id_no_class(): + """Create a sample InvocationId without class for testing.""" + return InvocationId( + test_module_path="tests.test_module", + test_class_name=None, + test_function_name="test_function", + ) -def test_quick_sort(): - codeflash_output = quick_sort([5, 2, 8]) - assert codeflash_output == [2, 5, 8] -def helper_function(): - return "not a test" -""" +class TestAddRuntimeCommentsToGeneratedTests: + def test_add_runtime_comments_simple_function(self, test_config): + """Test adding runtime comments to a simple test function.""" + test_source = '''def test_function(): + codeflash_output = some_function() + assert codeflash_output == expected +''' generated_test = GeneratedTests( generated_original_test_source=test_source, instrumented_behavior_test_source="", instrumented_perf_test_source="", - behavior_file_path=Path("test_behavior.py"), - perf_file_path=Path("test_perf.py"), + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py"), ) generated_tests = GeneratedTestsList(generated_tests=[generated_test]) - # Create test results for both functions - original_test_results = TestResults() - optimized_test_results = TestResults() - - # Add test invocations for both test functions - original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000)) - original_test_results.add(self.create_test_invocation("test_quick_sort", 800_000)) - - optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000)) - optimized_test_results.add(self.create_test_invocation("test_quick_sort", 600_000)) - - # Test the functionality - result = add_runtime_comments_to_generated_tests(generated_tests, original_test_results, optimized_test_results) - - modified_source = result.generated_tests[0].generated_original_test_source - - # Check that comments were added to both test functions - assert "# 500μs -> 300μs" in modified_source - assert "# 800μs -> 600μs" in modified_source - # Helper function should not have comments - assert ( - "helper_function():" in modified_source - and "# " not in modified_source.split("helper_function():")[1].split("\n")[0] + invocation_id = InvocationId( + test_module_path="tests.test_module", + test_class_name=None, + test_function_name="test_function", ) - def test_different_time_formats(self): - """Test that different time ranges are formatted correctly with new precision rules.""" - test_cases = [ - (999, 500, "999ns -> 500ns"), # nanoseconds - (25_000, 18_000, "25.0μs -> 18.0μs"), # microseconds with precision - (500_000, 300_000, "500μs -> 300μs"), # microseconds full integers - (1_500_000, 800_000, "1.50ms -> 800μs"), # milliseconds with precision - (365_000_000, 290_000_000, "365ms -> 290ms"), # milliseconds full integers - (2_000_000_000, 1_500_000_000, "2.00s -> 1.50s"), # seconds with precision - ] - - for original_time, optimized_time, expected_comment in test_cases: - test_source = """def test_function(): - codeflash_output = some_function() - assert codeflash_output is not None -""" - - generated_test = GeneratedTests( - generated_original_test_source=test_source, - instrumented_behavior_test_source="", - instrumented_perf_test_source="", - behavior_file_path=Path("test_behavior.py"), - perf_file_path=Path("test_perf.py"), - ) - - generated_tests = GeneratedTestsList(generated_tests=[generated_test]) - - # Create test results - original_test_results = TestResults() - optimized_test_results = TestResults() + original_runtimes = {invocation_id: [1000000000, 1200000000]} # 1s, 1.2s in nanoseconds + optimized_runtimes = {invocation_id: [500000000, 600000000]} # 0.5s, 0.6s in nanoseconds - original_test_results.add(self.create_test_invocation("test_function", original_time)) - optimized_test_results.add(self.create_test_invocation("test_function", optimized_time)) + result = add_runtime_comments_to_generated_tests( + test_config, generated_tests, original_runtimes, optimized_runtimes + ) - # Test the functionality - result = add_runtime_comments_to_generated_tests( - generated_tests, original_test_results, optimized_test_results - ) + expected_source = '''def test_function(): + codeflash_output = some_function() # 1.00s -> 500.00ms (50.00%) + assert codeflash_output == expected +''' - modified_source = result.generated_tests[0].generated_original_test_source - assert f"# {expected_comment}" in modified_source + assert len(result.generated_tests) == 1 + assert result.generated_tests[0].generated_original_test_source == expected_source - def test_missing_test_results(self): - """Test behavior when test results are missing for a test function.""" - test_source = """def test_bubble_sort(): - codeflash_output = bubble_sort([3, 1, 2]) - assert codeflash_output == [1, 2, 3] -""" + def test_add_runtime_comments_class_method(self, test_config): + """Test adding runtime comments to a test method within a class.""" + test_source = '''class TestClass: + def test_function(self): + codeflash_output = some_function() + assert codeflash_output == expected +''' generated_test = GeneratedTests( generated_original_test_source=test_source, instrumented_behavior_test_source="", instrumented_perf_test_source="", - behavior_file_path=Path("test_behavior.py"), - perf_file_path=Path("test_perf.py"), + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py"), ) generated_tests = GeneratedTestsList(generated_tests=[generated_test]) - # Create empty test results - original_test_results = TestResults() - optimized_test_results = TestResults() - - # Test the functionality - result = add_runtime_comments_to_generated_tests(generated_tests, original_test_results, optimized_test_results) - - # Check that no comments were added - modified_source = result.generated_tests[0].generated_original_test_source - assert modified_source == test_source # Should be unchanged - - def test_partial_test_results(self): - """Test behavior when only one set of test results is available.""" - test_source = """def test_bubble_sort(): - codeflash_output = bubble_sort([3, 1, 2]) - assert codeflash_output == [1, 2, 3] -""" - - generated_test = GeneratedTests( - generated_original_test_source=test_source, - instrumented_behavior_test_source="", - instrumented_perf_test_source="", - behavior_file_path=Path("test_behavior.py"), - perf_file_path=Path("test_perf.py"), + invocation_id = InvocationId( + test_module_path="tests.test_module", + test_class_name="TestClass", + test_function_name="test_function", ) - generated_tests = GeneratedTestsList(generated_tests=[generated_test]) - - # Create test results with only original data - original_test_results = TestResults() - optimized_test_results = TestResults() + original_runtimes = {invocation_id: [2000000000]} # 2s in nanoseconds + optimized_runtimes = {invocation_id: [1000000000]} # 1s in nanoseconds - original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000)) - # No optimized results + result = add_runtime_comments_to_generated_tests( + test_config, generated_tests, original_runtimes, optimized_runtimes + ) - # Test the functionality - result = add_runtime_comments_to_generated_tests(generated_tests, original_test_results, optimized_test_results) + expected_source = '''class TestClass: + def test_function(self): + codeflash_output = some_function() # 2.00s -> 1.00s (50.00%) + assert codeflash_output == expected +''' - # Check that no comments were added - modified_source = result.generated_tests[0].generated_original_test_source - assert modified_source == test_source # Should be unchanged + assert len(result.generated_tests) == 1 + assert result.generated_tests[0].generated_original_test_source == expected_source - def test_multiple_runtimes_uses_minimum(self): - """Test that when multiple runtimes exist, the minimum is used.""" - test_source = """def test_bubble_sort(): - codeflash_output = bubble_sort([3, 1, 2]) - assert codeflash_output == [1, 2, 3] -""" + def test_add_runtime_comments_multiple_assignments(self, test_config): + """Test adding runtime comments when there are multiple codeflash_output assignments.""" + test_source = '''def test_function(): + setup_data = prepare_test() + codeflash_output = some_function() + assert codeflash_output == expected + codeflash_output = another_function() + assert codeflash_output == expected2 +''' generated_test = GeneratedTests( generated_original_test_source=test_source, instrumented_behavior_test_source="", instrumented_perf_test_source="", - behavior_file_path=Path("test_behavior.py"), - perf_file_path=Path("test_perf.py"), + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py"), ) generated_tests = GeneratedTestsList(generated_tests=[generated_test]) - # Create test results with multiple loop iterations - original_test_results = TestResults() - optimized_test_results = TestResults() + invocation_id = InvocationId( + test_module_path="tests.test_module", + test_class_name=None, + test_function_name="test_function", + ) - # Add multiple runs with different runtimes - original_test_results.add(self.create_test_invocation("test_bubble_sort", 600_000, loop_index=1)) - original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000, loop_index=2)) - original_test_results.add(self.create_test_invocation("test_bubble_sort", 550_000, loop_index=3)) + original_runtimes = {invocation_id: [1500000000]} # 1.5s in nanoseconds + optimized_runtimes = {invocation_id: [750000000]} # 0.75s in nanoseconds - optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 350_000, loop_index=1)) - optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000, loop_index=2)) - optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 320_000, loop_index=3)) + result = add_runtime_comments_to_generated_tests( + test_config, generated_tests, original_runtimes, optimized_runtimes + ) - # Test the functionality - result = add_runtime_comments_to_generated_tests(generated_tests, original_test_results, optimized_test_results) + expected_source = '''def test_function(): + setup_data = prepare_test() + codeflash_output = some_function() # 1.50s -> 750.00ms (50.00%) + assert codeflash_output == expected + codeflash_output = another_function() # 1.50s -> 750.00ms (50.00%) + assert codeflash_output == expected2 +''' - # Check that minimum times were used (500μs -> 300μs) - modified_source = result.generated_tests[0].generated_original_test_source - assert "# 500μs -> 300μs" in modified_source + assert len(result.generated_tests) == 1 + assert result.generated_tests[0].generated_original_test_source == expected_source - def test_no_codeflash_output_assignment(self): - """Test behavior when test doesn't have codeflash_output assignment.""" - test_source = """def test_bubble_sort(): - result = bubble_sort([3, 1, 2]) - assert result == [1, 2, 3] -""" + def test_add_runtime_comments_no_matching_runtimes(self, test_config): + """Test that source remains unchanged when no matching runtimes are found.""" + test_source = '''def test_function(): + codeflash_output = some_function() + assert codeflash_output == expected +''' generated_test = GeneratedTests( generated_original_test_source=test_source, instrumented_behavior_test_source="", instrumented_perf_test_source="", - behavior_file_path=Path("test_behavior.py"), - perf_file_path=Path("test_perf.py"), + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py"), ) generated_tests = GeneratedTestsList(generated_tests=[generated_test]) - # Create test results - original_test_results = TestResults() - optimized_test_results = TestResults() + # Different invocation ID that won't match + invocation_id = InvocationId( + test_module_path="tests.other_module", + test_class_name=None, + test_function_name="other_function", + ) - original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000)) - optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000)) + original_runtimes = {invocation_id: [1000000000]} + optimized_runtimes = {invocation_id: [500000000]} - # Test the functionality - result = add_runtime_comments_to_generated_tests(generated_tests, original_test_results, optimized_test_results) + result = add_runtime_comments_to_generated_tests( + test_config, generated_tests, original_runtimes, optimized_runtimes + ) - # Check that no comments were added (no codeflash_output assignment) - modified_source = result.generated_tests[0].generated_original_test_source - assert modified_source == test_source # Should be unchanged + # Source should remain unchanged + assert len(result.generated_tests) == 1 + assert result.generated_tests[0].generated_original_test_source == test_source - def test_invalid_python_code_handling(self): - """Test behavior when test source code is invalid Python.""" - test_source = """def test_bubble_sort(: - codeflash_output = bubble_sort([3, 1, 2]) - assert codeflash_output == [1, 2, 3] -""" # Invalid syntax: extra colon + def test_add_runtime_comments_no_codeflash_output(self, test_config): + """Test that source remains unchanged when there's no codeflash_output assignment.""" + test_source = '''def test_function(): + result = some_function() + assert result == expected +''' generated_test = GeneratedTests( generated_original_test_source=test_source, instrumented_behavior_test_source="", instrumented_perf_test_source="", - behavior_file_path=Path("test_behavior.py"), - perf_file_path=Path("test_perf.py"), + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py"), ) generated_tests = GeneratedTestsList(generated_tests=[generated_test]) - # Create test results - original_test_results = TestResults() - optimized_test_results = TestResults() + invocation_id = InvocationId( + test_module_path="tests.test_module", + test_class_name=None, + test_function_name="test_function", + ) - original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000)) - optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000)) + original_runtimes = {invocation_id: [1000000000]} + optimized_runtimes = {invocation_id: [500000000]} - # Test the functionality - should handle parse error gracefully - result = add_runtime_comments_to_generated_tests(generated_tests, original_test_results, optimized_test_results) + result = add_runtime_comments_to_generated_tests( + test_config, generated_tests, original_runtimes, optimized_runtimes + ) - # Check that original test is preserved when parsing fails - modified_source = result.generated_tests[0].generated_original_test_source - assert modified_source == test_source # Should be unchanged due to parse error + # Source should remain unchanged + assert len(result.generated_tests) == 1 + assert result.generated_tests[0].generated_original_test_source == test_source - def test_multiple_generated_tests(self): - """Test handling multiple generated test objects.""" - test_source_1 = """def test_bubble_sort(): - codeflash_output = bubble_sort([3, 1, 2]) - assert codeflash_output == [1, 2, 3] -""" + def test_add_runtime_comments_multiple_tests(self, test_config): + """Test adding runtime comments to multiple generated tests.""" + test_source1 = '''def test_function1(): + codeflash_output = some_function() + assert codeflash_output == expected +''' - test_source_2 = """def test_quick_sort(): - codeflash_output = quick_sort([5, 2, 8]) - assert codeflash_output == [2, 5, 8] -""" + test_source2 = '''def test_function2(): + codeflash_output = another_function() + assert codeflash_output == expected +''' - generated_test_1 = GeneratedTests( - generated_original_test_source=test_source_1, + generated_test1 = GeneratedTests( + generated_original_test_source=test_source1, instrumented_behavior_test_source="", instrumented_perf_test_source="", - behavior_file_path=Path("test_behavior_1.py"), - perf_file_path=Path("test_perf_1.py"), + behavior_file_path=Path("/project/tests/test_module1.py"), + perf_file_path=Path("/project/tests/test_module1_perf.py"), ) - generated_test_2 = GeneratedTests( - generated_original_test_source=test_source_2, + generated_test2 = GeneratedTests( + generated_original_test_source=test_source2, instrumented_behavior_test_source="", instrumented_perf_test_source="", - behavior_file_path=Path("test_behavior_2.py"), - perf_file_path=Path("test_perf_2.py"), + behavior_file_path=Path("/project/tests/test_module2.py"), + perf_file_path=Path("/project/tests/test_module2_perf.py"), ) - generated_tests = GeneratedTestsList(generated_tests=[generated_test_1, generated_test_2]) - - # Create test results - original_test_results = TestResults() - optimized_test_results = TestResults() - - original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000)) - original_test_results.add(self.create_test_invocation("test_quick_sort", 800_000)) - - optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000)) - optimized_test_results.add(self.create_test_invocation("test_quick_sort", 600_000)) + generated_tests = GeneratedTestsList(generated_tests=[generated_test1, generated_test2]) - # Test the functionality - result = add_runtime_comments_to_generated_tests(generated_tests, original_test_results, optimized_test_results) - - # Check that comments were added to both test files - modified_source_1 = result.generated_tests[0].generated_original_test_source - modified_source_2 = result.generated_tests[1].generated_original_test_source - - assert "# 500μs -> 300μs" in modified_source_1 - assert "# 800μs -> 600μs" in modified_source_2 - - def test_preserved_test_attributes(self): - """Test that other test attributes are preserved during modification.""" - test_source = """def test_bubble_sort(): - codeflash_output = bubble_sort([3, 1, 2]) - assert codeflash_output == [1, 2, 3] -""" - - original_behavior_source = "behavior test source" - original_perf_source = "perf test source" - original_behavior_path = Path("test_behavior.py") - original_perf_path = Path("test_perf.py") - - generated_test = GeneratedTests( - generated_original_test_source=test_source, - instrumented_behavior_test_source=original_behavior_source, - instrumented_perf_test_source=original_perf_source, - behavior_file_path=original_behavior_path, - perf_file_path=original_perf_path, + invocation_id1 = InvocationId( + test_module_path="tests.test_module1", + test_class_name=None, + test_function_name="test_function1", ) - generated_tests = GeneratedTestsList(generated_tests=[generated_test]) - - # Create test results - original_test_results = TestResults() - optimized_test_results = TestResults() + invocation_id2 = InvocationId( + test_module_path="tests.test_module2", + test_class_name=None, + test_function_name="test_function2", + ) - original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000)) - optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000)) + original_runtimes = { + invocation_id1: [1000000000], # 1s + invocation_id2: [2000000000], # 2s + } + optimized_runtimes = { + invocation_id1: [500000000], # 0.5s + invocation_id2: [800000000], # 0.8s + } + + result = add_runtime_comments_to_generated_tests( + test_config, generated_tests, original_runtimes, optimized_runtimes + ) - # Test the functionality - result = add_runtime_comments_to_generated_tests(generated_tests, original_test_results, optimized_test_results) + expected_source1 = '''def test_function1(): + codeflash_output = some_function() # 1.00s -> 500.00ms (50.00%) + assert codeflash_output == expected +''' - # Check that other attributes are preserved - modified_test = result.generated_tests[0] - assert modified_test.instrumented_behavior_test_source == original_behavior_source - assert modified_test.instrumented_perf_test_source == original_perf_source - assert modified_test.behavior_file_path == original_behavior_path - assert modified_test.perf_file_path == original_perf_path + expected_source2 = '''def test_function2(): + codeflash_output = another_function() # 2.00s -> 800.00ms (60.00%) + assert codeflash_output == expected +''' - # Check that only the generated_original_test_source was modified - assert "# 500μs -> 300μs" in modified_test.generated_original_test_source + assert len(result.generated_tests) == 2 + assert result.generated_tests[0].generated_original_test_source == expected_source1 + assert result.generated_tests[1].generated_original_test_source == expected_source2 - def test_multistatement_line_handling(self): - """Test that runtime comments work correctly with multiple statements on one line.""" - test_source = """def test_mutation_of_input(): - # Test that the input list is mutated in-place and returned - arr = [3, 1, 2] - codeflash_output = sorter(arr); result = codeflash_output - assert result == [1, 2, 3] - assert arr == [1, 2, 3] # Input should be mutated -""" + def test_add_runtime_comments_performance_regression(self, test_config): + """Test adding runtime comments when optimized version is slower (negative performance gain).""" + test_source = '''def test_function(): + codeflash_output = some_function() + assert codeflash_output == expected +''' generated_test = GeneratedTests( generated_original_test_source=test_source, instrumented_behavior_test_source="", instrumented_perf_test_source="", - behavior_file_path=Path("test_behavior.py"), - perf_file_path=Path("test_perf.py"), + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py"), ) generated_tests = GeneratedTestsList(generated_tests=[generated_test]) - # Create test results - original_test_results = TestResults() - optimized_test_results = TestResults() - - original_test_results.add(self.create_test_invocation("test_mutation_of_input", 19_000)) # 19μs - optimized_test_results.add(self.create_test_invocation("test_mutation_of_input", 14_000)) # 14μs + invocation_id = InvocationId( + test_module_path="tests.test_module", + test_class_name=None, + test_function_name="test_function", + ) - # Test the functionality - result = add_runtime_comments_to_generated_tests(generated_tests, original_test_results, optimized_test_results) + original_runtimes = {invocation_id: [1000000000]} # 1s + optimized_runtimes = {invocation_id: [1500000000]} # 1.5s (slower!) - # Check that comments were added to the correct line - modified_source = result.generated_tests[0].generated_original_test_source - assert "# 19.0μs -> 14.0μs" in modified_source + result = add_runtime_comments_to_generated_tests( + test_config, generated_tests, original_runtimes, optimized_runtimes + ) - # Verify the comment is on the line with codeflash_output assignment - lines = modified_source.split("\n") - codeflash_line = None - for line in lines: - if "codeflash_output = sorter(arr)" in line: - codeflash_line = line - break + expected_source = '''def test_function(): + codeflash_output = some_function() # 1.00s -> 1.50s (-50.00%) + assert codeflash_output == expected +''' - assert codeflash_line is not None, "Could not find codeflash_output assignment line" - assert "# 19.0μs -> 14.0μs" in codeflash_line, f"Comment not found in the correct line: {codeflash_line}" + assert len(result.generated_tests) == 1 + assert result.generated_tests[0].generated_original_test_source == expected_source diff --git a/tests/test_existing_tests_source_for.py b/tests/test_existing_tests_source_for.py index 1a00c47c0..27495939c 100644 --- a/tests/test_existing_tests_source_for.py +++ b/tests/test_existing_tests_source_for.py @@ -1,645 +1,390 @@ -"""Tests for the existing_tests_source_for function in result/create_pr.py.""" +from __future__ import annotations +import os from pathlib import Path -from unittest.mock import patch +from typing import NamedTuple import pytest -from codeflash.models.models import ( - CodePosition, - FunctionCalledInTest, - FunctionTestInvocation, - InvocationId, - TestResults, - TestsInFile, - TestType, VerificationType, -) + from codeflash.result.create_pr import existing_tests_source_for -@pytest.fixture -def sample_tests_root(tmp_path: Path) -> Path: - """Create a temporary test root directory.""" - return tmp_path / "tests" +class MockInvocationId(NamedTuple): + test_module_path: str + test_class_name: str | None + test_function_name: str -@pytest.fixture -def sample_function_to_tests(sample_tests_root: Path) -> dict[str, set[FunctionCalledInTest]]: - """Create sample function to tests mapping.""" - test_file_1 = sample_tests_root / "test_module1.py" - test_file_2 = sample_tests_root / "test_module2.py" +class MockTestsInFile(NamedTuple): + test_file: str - return { - "my_module.my_function": { - FunctionCalledInTest( - tests_in_file=TestsInFile( - test_file=test_file_1, - test_class=None, - test_function="test_basic_functionality", - test_type=TestType.EXISTING_UNIT_TEST, - ), - position=CodePosition(line_no=10, col_no=4), - ), - FunctionCalledInTest( - tests_in_file=TestsInFile( - test_file=test_file_1, - test_class="TestMyFunction", - test_function="test_edge_cases", - test_type=TestType.EXISTING_UNIT_TEST, - ), - position=CodePosition(line_no=20, col_no=8), - ), - FunctionCalledInTest( - tests_in_file=TestsInFile( - test_file=test_file_2, - test_class=None, - test_function="test_performance", - test_type=TestType.EXISTING_UNIT_TEST, - ), - position=CodePosition(line_no=15, col_no=4), - ), - } - } - - -@pytest.fixture -def sample_original_test_results() -> TestResults: - """Create sample original test results with timing information.""" - results = TestResults() - - # Test case 1: test_basic_functionality with multiple function calls - results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module1", - test_class_name=None, - test_function_name="test_basic_functionality", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/test_module1.py"), - did_pass=True, - runtime=1000, # 1000 ns - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + +class MockFunctionCalledInTest(NamedTuple): + tests_in_file: MockTestsInFile + + +class MockTestConfig(NamedTuple): + tests_root: Path + project_root_path: Path + + +class TestExistingTestsSourceFor: + """Test cases for existing_tests_source_for function.""" + + def test_no_test_files_found(self): + """Test when no test files are found for the function.""" + function_qualified_name = "module.function_name" + function_to_tests = {} + test_cfg = MockTestConfig( + tests_root=Path("/project/tests"), + project_root_path=Path("/project") ) - ) - - results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module1", - test_class_name=None, - test_function_name="test_basic_functionality", - function_getting_tested="my_function", - iteration_id="2", - ), - file_name=Path("/tmp/tests/test_module1.py"), - did_pass=True, - runtime=500, # 500 ns - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + original_runtimes = {} + optimized_runtimes = {} + + result = existing_tests_source_for( + function_qualified_name, + function_to_tests, + test_cfg, + original_runtimes, + optimized_runtimes ) - ) - - # Test case 2: test_edge_cases - results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module1", - test_class_name="TestMyFunction", - test_function_name="test_edge_cases", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/test_module1.py"), - did_pass=True, - runtime=2000, # 2000 ns - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + + assert result == "" + + def test_single_test_file_with_function_test(self): + """Test with a single test file containing one test function.""" + function_qualified_name = "module.function_name" + test_file_path = "/project/tests/test_module.py" + + function_to_tests = { + function_qualified_name: { + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=test_file_path) + ) + } + } + + test_cfg = MockTestConfig( + tests_root=Path("/project/tests"), + project_root_path=Path("/project") ) - ) - - # Test case 3: test_performance - results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module2", - test_class_name=None, - test_function_name="test_performance", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/test_module2.py"), - did_pass=True, - runtime=3000, # 3000 ns - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + + invocation_id = MockInvocationId( + test_module_path="tests.test_module", + test_class_name=None, + test_function_name="test_function" ) - ) - - return results - - -@pytest.fixture -def sample_optimized_test_results() -> TestResults: - """Create sample optimized test results with improved timing information.""" - results = TestResults() - - # Test case 1: test_basic_functionality with multiple function calls (improved) - results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module1", - test_class_name=None, - test_function_name="test_basic_functionality", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/test_module1.py"), - did_pass=True, - runtime=800, # 800 ns (improved from 1000 ns) - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + + original_runtimes = {invocation_id: [1000000, 1100000, 900000]} # 1ms, 1.1ms, 0.9ms + optimized_runtimes = {invocation_id: [500000, 600000, 400000]} # 0.5ms, 0.6ms, 0.4ms + + result = existing_tests_source_for( + function_qualified_name, + function_to_tests, + test_cfg, + original_runtimes, + optimized_runtimes ) - ) - - results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module1", - test_class_name=None, - test_function_name="test_basic_functionality", - function_getting_tested="my_function", - iteration_id="2", - ), - file_name=Path("/tmp/tests/test_module1.py"), - did_pass=True, - runtime=400, # 400 ns (improved from 500 ns) - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + + expected = """- test_module.py + - test_function: 900μs -> 400μs $\\color{green}(55.56\\%)$ + +""" + assert result == expected + + def test_single_test_file_with_class_test(self): + """Test with a single test file containing a test method in a class.""" + function_qualified_name = "module.function_name" + test_file_path = "/project/tests/test_module.py" + + function_to_tests = { + function_qualified_name: { + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=test_file_path) + ) + } + } + + test_cfg = MockTestConfig( + tests_root=Path("/project/tests"), + project_root_path=Path("/project") ) - ) - - # Test case 2: test_edge_cases (improved) - results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module1", - test_class_name="TestMyFunction", - test_function_name="test_edge_cases", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/test_module1.py"), - did_pass=True, - runtime=1500, # 1500 ns (improved from 2000 ns) - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + + invocation_id = MockInvocationId( + test_module_path="tests.test_module", + test_class_name="TestClass", + test_function_name="test_method" ) - ) - - # Test case 3: test_performance (improved) - results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module2", - test_class_name=None, - test_function_name="test_performance", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/test_module2.py"), - did_pass=True, - runtime=2100, # 2100 ns (improved from 3000 ns) - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + + original_runtimes = {invocation_id: [2000000]} # 2ms + optimized_runtimes = {invocation_id: [3000000]} # 3ms (slower) + + result = existing_tests_source_for( + function_qualified_name, + function_to_tests, + test_cfg, + original_runtimes, + optimized_runtimes ) - ) - return results + expected = """- test_module.py + - TestClass.test_method: 2.00ms -> 3.00ms $\\color{red}(-50.00\\%)$ +""" + assert result == expected -def test_existing_tests_source_for_without_timing_info( - sample_function_to_tests: dict[str, set[FunctionCalledInTest]], sample_tests_root: Path -): - """Test the function works without timing information (backward compatibility).""" - result = existing_tests_source_for("my_module.my_function", sample_function_to_tests, sample_tests_root) + def test_multiple_test_files_and_methods(self): + """Test with multiple test files and multiple test methods.""" + function_qualified_name = "module.function_name" + test_file_path1 = "/project/tests/test_module1.py" + test_file_path2 = "/project/tests/test_module2.py" - expected_lines = ["- test_module1.py", "- test_module2.py"] + function_to_tests = { + function_qualified_name: { + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=test_file_path1) + ), + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=test_file_path2) + ) + } + } + + test_cfg = MockTestConfig( + tests_root=Path("/project/tests"), + project_root_path=Path("/project") + ) - for line in expected_lines: - assert line in result + invocation_id1 = MockInvocationId( + test_module_path="tests.test_module1", + test_class_name=None, + test_function_name="test_function1" + ) - # Should not contain any timing information - assert "->" not in result - assert "ns" not in result + invocation_id2 = MockInvocationId( + test_module_path="tests.test_module1", + test_class_name="TestClass", + test_function_name="test_method1" + ) + invocation_id3 = MockInvocationId( + test_module_path="tests.test_module2", + test_class_name=None, + test_function_name="test_function2" + ) -def test_existing_tests_source_for_with_timing_info( - sample_function_to_tests: dict[str, set[FunctionCalledInTest]], - sample_tests_root: Path, - sample_original_test_results: TestResults, - sample_optimized_test_results: TestResults, -): - """Test the function includes timing information when provided.""" - with patch("codeflash.code_utils.time_utils.format_time") as mock_format_time: - # Mock format_time to return predictable values - mock_format_time.side_effect = lambda x: f"{x} ns" + original_runtimes = { + invocation_id1: [1000000], # 1ms + invocation_id2: [2000000], # 2ms + invocation_id3: [500000] # 0.5ms + } + optimized_runtimes = { + invocation_id1: [800000], # 0.8ms + invocation_id2: [1500000], # 1.5ms + invocation_id3: [400000] # 0.4ms + } result = existing_tests_source_for( - "my_module.my_function", - sample_function_to_tests, - sample_tests_root, - sample_original_test_results, - sample_optimized_test_results, + function_qualified_name, + function_to_tests, + test_cfg, + original_runtimes, + optimized_runtimes ) - # Should contain file names - assert "- test_module1.py" in result - assert "- test_module2.py" in result + expected = """- test_module1.py + - TestClass.test_method1: 2.00ms -> 1.50ms $\\color{green}(25.00\\%)$ + - test_function1: 1.00ms -> 800μs $\\color{green}(20.00\\%)$ - # Should contain test function names with timing (using min values now) - assert "test_basic_functionality: 500 ns -> 400 ns" in result # min(1000,500) -> min(800,400) - assert "test_edge_cases: 2000 ns -> 1500 ns" in result - assert "test_performance: 3000 ns -> 2100 ns" in result +- test_module2.py + - test_function2: 500μs -> 400μs $\\color{green}(20.00\\%)$ +""" + assert result == expected -def test_existing_tests_source_for_aggregates_multiple_function_calls( - sample_function_to_tests: dict[str, set[FunctionCalledInTest]], - sample_tests_root: Path, - sample_original_test_results: TestResults, - sample_optimized_test_results: TestResults, -): - """Test that multiple function calls within a test case use minimum timing.""" - with patch("codeflash.code_utils.time_utils.format_time") as mock_format_time: - mock_format_time.side_effect = lambda x: f"{x} ns" + def test_missing_runtime_data(self): + """Test when runtime data is missing for some tests.""" + function_qualified_name = "module.function_name" + test_file_path = "/project/tests/test_module.py" - result = existing_tests_source_for( - "my_module.my_function", - sample_function_to_tests, - sample_tests_root, - sample_original_test_results, - sample_optimized_test_results, - ) + function_to_tests = { + function_qualified_name: { + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=test_file_path) + ) + } + } - # test_basic_functionality should show minimum timing: min(1000,500) -> min(800,400) - assert "test_basic_functionality: 500 ns -> 400 ns" in result - - -def test_existing_tests_source_for_only_includes_passing_tests( - sample_function_to_tests: dict[str, set[FunctionCalledInTest]], sample_tests_root: Path -): - """Test that only passing tests with runtime data are included in timing report.""" - original_results = TestResults() - optimized_results = TestResults() - - # Add a passing test with runtime - original_results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module1", - test_class_name=None, - test_function_name="test_basic_functionality", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/test_module1.py"), - did_pass=True, - runtime=1000, - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, - ) - ) - - optimized_results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module1", - test_class_name=None, - test_function_name="test_basic_functionality", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/test_module1.py"), - did_pass=True, - runtime=800, - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + test_cfg = MockTestConfig( + tests_root=Path("/project/tests"), + project_root_path=Path("/project") ) - ) - - # Add a failing test (should be excluded) - original_results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module1", - test_class_name="TestMyFunction", - test_function_name="test_edge_cases", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/test_module1.py"), - did_pass=False, # Failing test - runtime=2000, - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + + invocation_id1 = MockInvocationId( + test_module_path="tests.test_module", + test_class_name=None, + test_function_name="test_with_original_only" ) - ) - - # Add a test without runtime (should be excluded) - original_results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module2", - test_class_name=None, - test_function_name="test_performance", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/test_module2.py"), - did_pass=True, - runtime=None, # No runtime data - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + + invocation_id2 = MockInvocationId( + test_module_path="tests.test_module", + test_class_name=None, + test_function_name="test_with_optimized_only" ) - ) - with patch("codeflash.code_utils.time_utils.format_time") as mock_format_time: - mock_format_time.side_effect = lambda x: f"{x} ns" + original_runtimes = {invocation_id1: [1000000]} # Only original + optimized_runtimes = {invocation_id2: [500000]} # Only optimized result = existing_tests_source_for( - "my_module.my_function", sample_function_to_tests, sample_tests_root, original_results, optimized_results + function_qualified_name, + function_to_tests, + test_cfg, + original_runtimes, + optimized_runtimes + ) + + expected = """- test_module.py + - test_with_optimized_only: NaN -> 500μs + - test_with_original_only: 1.00ms -> NaN + +""" + assert result == expected + + def test_nested_test_directory(self): + """Test with nested test directories.""" + function_qualified_name = "module.function_name" + test_file_path = "/project/tests/unit/test_module.py" + + function_to_tests = { + function_qualified_name: { + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=test_file_path) + ) + } + } + + test_cfg = MockTestConfig( + tests_root=Path("/project/tests"), + project_root_path=Path("/project") ) - # Should only include the passing test with runtime data - assert "test_basic_functionality: 1000 ns -> 800 ns" in result - # Should not include failing test or test without runtime - assert "test_edge_cases" not in result - assert "test_performance" not in result - - -def test_existing_tests_source_for_with_empty_test_mapping(sample_tests_root: Path): - """Test behavior when there are no tests for the function.""" - result = existing_tests_source_for("nonexistent.function", {}, sample_tests_root) - - assert result == "" - - -def test_existing_tests_source_for_missing_optimized_results( - sample_function_to_tests: dict[str, set[FunctionCalledInTest]], - sample_tests_root: Path, - sample_original_test_results: TestResults, -): - """Test behavior when optimized results are missing for some test cases.""" - # Create optimized results that are missing some test cases - optimized_results = TestResults() - optimized_results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.test_module1", - test_class_name=None, - test_function_name="test_basic_functionality", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/test_module1.py"), - did_pass=True, - runtime=800, - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + invocation_id = MockInvocationId( + test_module_path="tests.unit.test_module", + test_class_name=None, + test_function_name="test_function" ) - ) - # Note: Missing test_edge_cases and test_performance optimized results - with patch("codeflash.code_utils.time_utils.format_time") as mock_format_time: - mock_format_time.side_effect = lambda x: f"{x} ns" + original_runtimes = {invocation_id: [1000000]} + optimized_runtimes = {invocation_id: [800000]} result = existing_tests_source_for( - "my_module.my_function", - sample_function_to_tests, - sample_tests_root, - sample_original_test_results, - optimized_results, + function_qualified_name, + function_to_tests, + test_cfg, + original_runtimes, + optimized_runtimes ) - # Should not include test cases without both original and optimized results - assert "test_basic_functionality" not in result # Missing second function call - assert "test_edge_cases" not in result - assert "test_performance" not in result - - # Should still show file names - assert "- test_module1.py" in result - assert "- test_module2.py" in result - - -def test_existing_tests_source_for_sorted_output(sample_tests_root: Path): - """Test that output is properly sorted by file name and test function name.""" - # Create a more complex test mapping with multiple files and functions - test_file_a = sample_tests_root / "a_test_module.py" - test_file_z = sample_tests_root / "z_test_module.py" - - function_to_tests = { - "my_module.my_function": { - FunctionCalledInTest( - tests_in_file=TestsInFile( - test_file=test_file_z, - test_class=None, - test_function="z_test_function", - test_type=TestType.EXISTING_UNIT_TEST, - ), - position=CodePosition(line_no=10, col_no=4), - ), - FunctionCalledInTest( - tests_in_file=TestsInFile( - test_file=test_file_a, - test_class=None, - test_function="a_test_function", - test_type=TestType.EXISTING_UNIT_TEST, - ), - position=CodePosition(line_no=20, col_no=8), - ), - FunctionCalledInTest( - tests_in_file=TestsInFile( - test_file=test_file_a, - test_class=None, - test_function="b_test_function", - test_type=TestType.EXISTING_UNIT_TEST, - ), - position=CodePosition(line_no=30, col_no=8), - ), + expected = """- unit/test_module.py + - test_function: 1.00ms -> 800μs $\\color{green}(20.00\\%)$ + +""" + assert result == expected + + def test_multiple_invocations_same_test(self): + """Test when the same test has multiple invocations (runtimes are summed).""" + function_qualified_name = "module.function_name" + test_file_path = "/project/tests/test_module.py" + + function_to_tests = { + function_qualified_name: { + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=test_file_path) + ) + } } - } - - original_results = TestResults() - optimized_results = TestResults() - - # Add test results for all functions - for test_func in ["a_test_function", "b_test_function"]: - original_results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.a_test_module", - test_class_name=None, - test_function_name=test_func, - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/a_test_module.py"), - did_pass=True, - runtime=1000, - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, - ) - ) - optimized_results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.a_test_module", - test_class_name=None, - test_function_name=test_func, - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/a_test_module.py"), - did_pass=True, - runtime=800, - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, - ) + test_cfg = MockTestConfig( + tests_root=Path("/project/tests"), + project_root_path=Path("/project") ) - original_results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.z_test_module", - test_class_name=None, - test_function_name="z_test_function", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/z_test_module.py"), - did_pass=True, - runtime=1000, - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + # Same test function with multiple invocations + invocation_id1 = MockInvocationId( + test_module_path="tests.test_module", + test_class_name=None, + test_function_name="test_function" ) - ) - - optimized_results.add( - FunctionTestInvocation( - id=InvocationId( - test_module_path="tests.z_test_module", - test_class_name=None, - test_function_name="z_test_function", - function_getting_tested="my_function", - iteration_id="1", - ), - file_name=Path("/tmp/tests/z_test_module.py"), - did_pass=True, - runtime=800, - test_framework="pytest", - test_type=TestType.EXISTING_UNIT_TEST, - return_value=None, - timed_out=False, - loop_index=1, + + invocation_id2 = MockInvocationId( + test_module_path="tests.test_module", + test_class_name=None, + test_function_name="test_function" ) - ) - with patch("codeflash.code_utils.time_utils.format_time") as mock_format_time: - mock_format_time.side_effect = lambda x: f"{x} ns" + original_runtimes = { + invocation_id1: [1000000, 1200000], # min: 1ms + invocation_id2: [800000, 900000] # min: 0.8ms + } + optimized_runtimes = { + invocation_id1: [600000, 700000], # min: 0.6ms + invocation_id2: [400000, 500000] # min: 0.4ms + } result = existing_tests_source_for( - "my_module.my_function", function_to_tests, sample_tests_root, original_results, optimized_results + function_qualified_name, + function_to_tests, + test_cfg, + original_runtimes, + optimized_runtimes ) - lines = result.split("\n") - - # Files should be sorted alphabetically - a_file_index = next(i for i, line in enumerate(lines) if "a_test_module.py" in line) - z_file_index = next(i for i, line in enumerate(lines) if "z_test_module.py" in line) - assert a_file_index < z_file_index - - # Test functions within a file should be sorted alphabetically - a_func_index = next(i for i, line in enumerate(lines) if "a_test_function" in line) - b_func_index = next(i for i, line in enumerate(lines) if "b_test_function" in line) - assert a_func_index < b_func_index + # Total original: 1ms + 0.8ms = 1.8ms + # Total optimized: 0.6ms + 0.4ms = 1ms + expected = """- test_module.py + - test_function: 1.80ms -> 1.00ms $\\color{green}(44.44\\%)$ + +""" + assert result == expected + + def test_zero_runtime_values(self): + """Test handling of zero runtime values.""" + function_qualified_name = "module.function_name" + test_file_path = "/project/tests/test_module.py" + + function_to_tests = { + function_qualified_name: { + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=test_file_path) + ) + } + } + test_cfg = MockTestConfig( + tests_root=Path("/project/tests"), + project_root_path=Path("/project") + ) + invocation_id = MockInvocationId( + test_module_path="tests.test_module", + test_class_name=None, + test_function_name="test_function" + ) -def test_existing_tests_source_for_format_time_called_correctly( - sample_function_to_tests: dict[str, set[FunctionCalledInTest]], - sample_tests_root: Path, - sample_original_test_results: TestResults, - sample_optimized_test_results: TestResults, -): - """Test that format_time is called with correct values (min of runtime lists).""" - with patch("codeflash.code_utils.time_utils.format_time") as mock_format_time: - mock_format_time.side_effect = lambda x: f"{x} ns" + original_runtimes = {invocation_id: [0]} + optimized_runtimes = {invocation_id: [0]} - existing_tests_source_for( - "my_module.my_function", - sample_function_to_tests, - sample_tests_root, - sample_original_test_results, - sample_optimized_test_results, + result = existing_tests_source_for( + function_qualified_name, + function_to_tests, + test_cfg, + original_runtimes, + optimized_runtimes ) - # Check that format_time was called with the minimum values - call_args = [call[0][0] for call in mock_format_time.call_args_list] + expected = """- test_module.py + - test_function: NaN -> NaN - # Should include minimum values (not aggregated) - assert 500 in call_args # test_basic_functionality original: min(1000, 500) - assert 400 in call_args # test_basic_functionality optimized: min(800, 400) - assert 2000 in call_args # test_edge_cases original - assert 1500 in call_args # test_edge_cases optimized - assert 3000 in call_args # test_performance original - assert 2100 in call_args # test_performance optimized \ No newline at end of file +""" + assert result == expected From d2289e54a80d0ac86f8083f995a658f29f96a662 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Tue, 17 Jun 2025 20:15:24 -0700 Subject: [PATCH 11/16] tests work now, ready to merge --- codeflash/result/create_pr.py | 33 +- tests/test_add_runtime_comments.py | 73 ++-- tests/test_existing_tests_source_for.py | 479 +++++++++++------------- 3 files changed, 295 insertions(+), 290 deletions(-) diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index 72ef6d244..518781a9e 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -17,6 +17,7 @@ git_root_dir, ) from codeflash.code_utils.github_utils import github_pr_url +from codeflash.code_utils.tabulate import tabulate from codeflash.code_utils.time_utils import format_time from codeflash.github.PrComment import FileDiffContent, PrComment from codeflash.result.critic import performance_gain @@ -38,6 +39,8 @@ def existing_tests_source_for( if not test_files: return "" output = "" + rows = [] + headers = ["Test File::Test Function", "Original ⏱️", "Optimized ⏱️", "Improvement"] tests_root = test_cfg.tests_root module_root = test_cfg.project_root_path rel_tests_root = tests_root.relative_to(module_root) @@ -76,7 +79,6 @@ def existing_tests_source_for( original_tests_to_runtimes.keys() ) # both will have the same keys as some default values are assigned in the previous loop for filename in sorted(all_rel_paths): - output += f"- {filename}\n" all_qualified_names = original_tests_to_runtimes[ filename ].keys() # both will have the same keys as some default values are assigned in the previous loop @@ -90,7 +92,6 @@ def existing_tests_source_for( print_original_runtime = "NaN" else: print_original_runtime = format_time(original_tests_to_runtimes[filename][qualified_name]) - arrow = "->" if ( original_tests_to_runtimes[filename][qualified_name] != 0 and optimized_tests_to_runtimes[filename][qualified_name] != 0 @@ -107,14 +108,32 @@ def existing_tests_source_for( * 100 ) if greater: - output += f" - {qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime} $$\\color{{red}}({perf_gain:.2f}\\\\%)$$\n" + rows.append( + [ + f"`{filename}::{qualified_name}`", + f"{print_original_runtime}", + f"{print_optimized_runtime}", + f"⚠️{perf_gain:.2f}%", + ] + ) else: - output += f" - {qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime} $$\\color{{green}}({perf_gain:.2f}\\\\%)$$\n" + rows.append( + [ + f"`{filename}::{qualified_name}`", + f"{print_original_runtime}", + f"{print_optimized_runtime}", + f"✅{perf_gain:.2f}%", + ] + ) else: # one of them is NaN - output += f" - {qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime}\n" - # output += f"$$\\colorbox{{pink}}\{{ - {qualified_name}: {print_original_runtime} {arrow} {print_optimized_runtime}}}$$\n" - output += "\n" + rows.append( + [f"`{filename}::{qualified_name}`", f"{print_original_runtime}", f"{print_optimized_runtime}", "❌"] + ) + output += tabulate( + headers=headers, tabular_data=rows, tablefmt="pipe", colglobalalign=None, preserve_whitespace=True + ) + output += "\n" return output diff --git a/tests/test_add_runtime_comments.py b/tests/test_add_runtime_comments.py index bbb833751..c553845e8 100644 --- a/tests/test_add_runtime_comments.py +++ b/tests/test_add_runtime_comments.py @@ -14,28 +14,34 @@ def test_config(): """Create a mock TestConfig for testing.""" config = Mock(spec=TestConfig) config.project_root_path = Path("/project") + config.test_framework= "pytest" + config.tests_project_rootdir = Path("/project/tests") config.tests_root = Path("/project/tests") return config -@pytest.fixture -def sample_invocation_id(): - """Create a sample InvocationId for testing.""" - return InvocationId( - test_module_path="tests.test_module", - test_class_name="TestClass", - test_function_name="test_function", - ) - - -@pytest.fixture -def sample_invocation_id_no_class(): - """Create a sample InvocationId without class for testing.""" - return InvocationId( - test_module_path="tests.test_module", - test_class_name=None, - test_function_name="test_function", - ) +# @pytest.fixture +# def sample_invocation_id(): +# """Create a sample InvocationId for testing.""" +# return InvocationId( +# test_module_path="test_module_path", +# test_class_name="test_class_name", +# test_function_name="test_function_name", +# function_getting_tested="function_getting_tested", +# iteration_id="0", +# ) +# +# +# @pytest.fixture +# def sample_invocation_id_no_class(): +# """Create a sample InvocationId without class for testing.""" +# return InvocationId( +# test_module_path="test_module_path", +# test_class_name=None, +# test_function_name="test_function_name", +# function_getting_tested="function_getting_tested", +# iteration_id="0", +# ) class TestAddRuntimeCommentsToGeneratedTests: @@ -60,6 +66,8 @@ def test_add_runtime_comments_simple_function(self, test_config): test_module_path="tests.test_module", test_class_name=None, test_function_name="test_function", + function_getting_tested="some_function", + iteration_id="0", ) original_runtimes = {invocation_id: [1000000000, 1200000000]} # 1s, 1.2s in nanoseconds @@ -70,7 +78,7 @@ def test_add_runtime_comments_simple_function(self, test_config): ) expected_source = '''def test_function(): - codeflash_output = some_function() # 1.00s -> 500.00ms (50.00%) + codeflash_output = some_function() # 1.00s -> 500ms (100.00%) assert codeflash_output == expected ''' @@ -99,6 +107,9 @@ def test_function(self): test_module_path="tests.test_module", test_class_name="TestClass", test_function_name="test_function", + function_getting_tested="some_function", + iteration_id="0", + ) original_runtimes = {invocation_id: [2000000000]} # 2s in nanoseconds @@ -110,7 +121,7 @@ def test_function(self): expected_source = '''class TestClass: def test_function(self): - codeflash_output = some_function() # 2.00s -> 1.00s (50.00%) + codeflash_output = some_function() # 2.00s -> 1.00s (100.00%) assert codeflash_output == expected ''' @@ -141,6 +152,8 @@ def test_add_runtime_comments_multiple_assignments(self, test_config): test_module_path="tests.test_module", test_class_name=None, test_function_name="test_function", + function_getting_tested="some_function", + iteration_id="0", ) original_runtimes = {invocation_id: [1500000000]} # 1.5s in nanoseconds @@ -152,9 +165,9 @@ def test_add_runtime_comments_multiple_assignments(self, test_config): expected_source = '''def test_function(): setup_data = prepare_test() - codeflash_output = some_function() # 1.50s -> 750.00ms (50.00%) + codeflash_output = some_function() # 1.50s -> 750ms (100.00%) assert codeflash_output == expected - codeflash_output = another_function() # 1.50s -> 750.00ms (50.00%) + codeflash_output = another_function() # 1.50s -> 750ms (100.00%) assert codeflash_output == expected2 ''' @@ -183,6 +196,8 @@ def test_add_runtime_comments_no_matching_runtimes(self, test_config): test_module_path="tests.other_module", test_class_name=None, test_function_name="other_function", + function_getting_tested="some_other_function", + iteration_id="0", ) original_runtimes = {invocation_id: [1000000000]} @@ -217,6 +232,8 @@ def test_add_runtime_comments_no_codeflash_output(self, test_config): test_module_path="tests.test_module", test_class_name=None, test_function_name="test_function", + function_getting_tested="some_function", + iteration_id="0", ) original_runtimes = {invocation_id: [1000000000]} @@ -264,12 +281,16 @@ def test_add_runtime_comments_multiple_tests(self, test_config): test_module_path="tests.test_module1", test_class_name=None, test_function_name="test_function1", + function_getting_tested="some_function", + iteration_id="0", ) invocation_id2 = InvocationId( test_module_path="tests.test_module2", test_class_name=None, test_function_name="test_function2", + function_getting_tested="another_function", + iteration_id = "0", ) original_runtimes = { @@ -286,12 +307,12 @@ def test_add_runtime_comments_multiple_tests(self, test_config): ) expected_source1 = '''def test_function1(): - codeflash_output = some_function() # 1.00s -> 500.00ms (50.00%) + codeflash_output = some_function() # 1.00s -> 500ms (100.00%) assert codeflash_output == expected ''' expected_source2 = '''def test_function2(): - codeflash_output = another_function() # 2.00s -> 800.00ms (60.00%) + codeflash_output = another_function() # 2.00s -> 800ms (150.00%) assert codeflash_output == expected ''' @@ -320,6 +341,8 @@ def test_add_runtime_comments_performance_regression(self, test_config): test_module_path="tests.test_module", test_class_name=None, test_function_name="test_function", + function_getting_tested="some_function", + iteration_id="0", ) original_runtimes = {invocation_id: [1000000000]} # 1s @@ -330,7 +353,7 @@ def test_add_runtime_comments_performance_regression(self, test_config): ) expected_source = '''def test_function(): - codeflash_output = some_function() # 1.00s -> 1.50s (-50.00%) + codeflash_output = some_function() # 1.00s -> 1.50s (-33.33%) assert codeflash_output == expected ''' diff --git a/tests/test_existing_tests_source_for.py b/tests/test_existing_tests_source_for.py index 27495939c..945de6d84 100644 --- a/tests/test_existing_tests_source_for.py +++ b/tests/test_existing_tests_source_for.py @@ -1,390 +1,353 @@ -from __future__ import annotations - import os from pathlib import Path -from typing import NamedTuple +from unittest.mock import Mock import pytest from codeflash.result.create_pr import existing_tests_source_for -class MockInvocationId(NamedTuple): - test_module_path: str - test_class_name: str | None - test_function_name: str - - -class MockTestsInFile(NamedTuple): - test_file: str - - -class MockFunctionCalledInTest(NamedTuple): - tests_in_file: MockTestsInFile - - -class MockTestConfig(NamedTuple): - tests_root: Path - project_root_path: Path - - class TestExistingTestsSourceFor: """Test cases for existing_tests_source_for function.""" - def test_no_test_files_found(self): - """Test when no test files are found for the function.""" - function_qualified_name = "module.function_name" + def setup_method(self): + """Set up test fixtures.""" + # Mock test config + self.test_cfg = Mock() + self.test_cfg.tests_root = Path("/project/tests") + self.test_cfg.project_root_path = Path("/project") + + # Mock invocation ID + self.mock_invocation_id = Mock() + self.mock_invocation_id.test_module_path = "tests.test_module" + self.mock_invocation_id.test_class_name = "TestClass" + self.mock_invocation_id.test_function_name = "test_function" + + # Mock function called in test + self.mock_function_called_in_test = Mock() + self.mock_function_called_in_test.tests_in_file = Mock() + self.mock_function_called_in_test.tests_in_file.test_file = "/project/tests/test_module.py" + + def test_no_test_files_returns_empty_string(self): + """Test that function returns empty string when no test files exist.""" function_to_tests = {} - test_cfg = MockTestConfig( - tests_root=Path("/project/tests"), - project_root_path=Path("/project") - ) original_runtimes = {} optimized_runtimes = {} result = existing_tests_source_for( - function_qualified_name, + "module.function", function_to_tests, - test_cfg, + self.test_cfg, original_runtimes, optimized_runtimes ) assert result == "" - def test_single_test_file_with_function_test(self): - """Test with a single test file containing one test function.""" - function_qualified_name = "module.function_name" - test_file_path = "/project/tests/test_module.py" - + def test_single_test_with_improvement(self): + """Test single test showing performance improvement.""" function_to_tests = { - function_qualified_name: { - MockFunctionCalledInTest( - tests_in_file=MockTestsInFile(test_file=test_file_path) - ) - } + "module.function": {self.mock_function_called_in_test} + } + original_runtimes = { + self.mock_invocation_id: [1000000] # 1ms in nanoseconds + } + optimized_runtimes = { + self.mock_invocation_id: [500000] # 0.5ms in nanoseconds } - - test_cfg = MockTestConfig( - tests_root=Path("/project/tests"), - project_root_path=Path("/project") - ) - - invocation_id = MockInvocationId( - test_module_path="tests.test_module", - test_class_name=None, - test_function_name="test_function" - ) - - original_runtimes = {invocation_id: [1000000, 1100000, 900000]} # 1ms, 1.1ms, 0.9ms - optimized_runtimes = {invocation_id: [500000, 600000, 400000]} # 0.5ms, 0.6ms, 0.4ms result = existing_tests_source_for( - function_qualified_name, + "module.function", function_to_tests, - test_cfg, + self.test_cfg, original_runtimes, optimized_runtimes ) - expected = """- test_module.py - - test_function: 900μs -> 400μs $\\color{green}(55.56\\%)$ - + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | +|:------------------------------------------|:--------------|:---------------|:--------------| +| `test_module.py::TestClass.test_function` | 1.00ms | 500μs | ✅100.00% | """ - assert result == expected - def test_single_test_file_with_class_test(self): - """Test with a single test file containing a test method in a class.""" - function_qualified_name = "module.function_name" - test_file_path = "/project/tests/test_module.py" + assert result == expected + def test_single_test_with_regression(self): + """Test single test showing performance regression.""" function_to_tests = { - function_qualified_name: { - MockFunctionCalledInTest( - tests_in_file=MockTestsInFile(test_file=test_file_path) - ) - } + "module.function": {self.mock_function_called_in_test} + } + original_runtimes = { + self.mock_invocation_id: [500000] # 0.5ms in nanoseconds + } + optimized_runtimes = { + self.mock_invocation_id: [1000000] # 1ms in nanoseconds } - - test_cfg = MockTestConfig( - tests_root=Path("/project/tests"), - project_root_path=Path("/project") - ) - - invocation_id = MockInvocationId( - test_module_path="tests.test_module", - test_class_name="TestClass", - test_function_name="test_method" - ) - - original_runtimes = {invocation_id: [2000000]} # 2ms - optimized_runtimes = {invocation_id: [3000000]} # 3ms (slower) result = existing_tests_source_for( - function_qualified_name, + "module.function", function_to_tests, - test_cfg, + self.test_cfg, original_runtimes, optimized_runtimes ) - expected = """- test_module.py - - TestClass.test_method: 2.00ms -> 3.00ms $\\color{red}(-50.00\\%)$ - + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | +|:------------------------------------------|:--------------|:---------------|:--------------| +| `test_module.py::TestClass.test_function` | 500μs | 1.00ms | ⚠️-50.00% | """ + assert result == expected - def test_multiple_test_files_and_methods(self): - """Test with multiple test files and multiple test methods.""" - function_qualified_name = "module.function_name" - test_file_path1 = "/project/tests/test_module1.py" - test_file_path2 = "/project/tests/test_module2.py" + def test_test_without_class_name(self): + """Test function without class name (standalone test function).""" + mock_invocation_no_class = Mock() + mock_invocation_no_class.test_module_path = "tests.test_module" + mock_invocation_no_class.test_class_name = None + mock_invocation_no_class.test_function_name = "test_standalone" function_to_tests = { - function_qualified_name: { - MockFunctionCalledInTest( - tests_in_file=MockTestsInFile(test_file=test_file_path1) - ), - MockFunctionCalledInTest( - tests_in_file=MockTestsInFile(test_file=test_file_path2) - ) - } + "module.function": {self.mock_function_called_in_test} } - - test_cfg = MockTestConfig( - tests_root=Path("/project/tests"), - project_root_path=Path("/project") - ) - - invocation_id1 = MockInvocationId( - test_module_path="tests.test_module1", - test_class_name=None, - test_function_name="test_function1" - ) - - invocation_id2 = MockInvocationId( - test_module_path="tests.test_module1", - test_class_name="TestClass", - test_function_name="test_method1" - ) - - invocation_id3 = MockInvocationId( - test_module_path="tests.test_module2", - test_class_name=None, - test_function_name="test_function2" - ) - original_runtimes = { - invocation_id1: [1000000], # 1ms - invocation_id2: [2000000], # 2ms - invocation_id3: [500000] # 0.5ms + mock_invocation_no_class: [1000000] } optimized_runtimes = { - invocation_id1: [800000], # 0.8ms - invocation_id2: [1500000], # 1.5ms - invocation_id3: [400000] # 0.4ms + mock_invocation_no_class: [800000] } result = existing_tests_source_for( - function_qualified_name, + "module.function", function_to_tests, - test_cfg, + self.test_cfg, original_runtimes, optimized_runtimes ) - expected = """- test_module1.py - - TestClass.test_method1: 2.00ms -> 1.50ms $\\color{green}(25.00\\%)$ - - test_function1: 1.00ms -> 800μs $\\color{green}(20.00\\%)$ - -- test_module2.py - - test_function2: 500μs -> 400μs $\\color{green}(20.00\\%)$ - + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | +|:----------------------------------|:--------------|:---------------|:--------------| +| `test_module.py::test_standalone` | 1.00ms | 800μs | ✅25.00% | """ - assert result == expected - def test_missing_runtime_data(self): - """Test when runtime data is missing for some tests.""" - function_qualified_name = "module.function_name" - test_file_path = "/project/tests/test_module.py" + assert result == expected + def test_missing_original_runtime(self): + """Test when original runtime is missing (shows NaN).""" function_to_tests = { - function_qualified_name: { - MockFunctionCalledInTest( - tests_in_file=MockTestsInFile(test_file=test_file_path) - ) - } + "module.function": {self.mock_function_called_in_test} + } + original_runtimes = {} + optimized_runtimes = { + self.mock_invocation_id: [500000] } - test_cfg = MockTestConfig( - tests_root=Path("/project/tests"), - project_root_path=Path("/project") + result = existing_tests_source_for( + "module.function", + function_to_tests, + self.test_cfg, + original_runtimes, + optimized_runtimes ) - invocation_id1 = MockInvocationId( - test_module_path="tests.test_module", - test_class_name=None, - test_function_name="test_with_original_only" - ) + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | +|:------------------------------------------|--------------:|:---------------|:--------------| +| `test_module.py::TestClass.test_function` | nan | 500μs | ❌ | +""" - invocation_id2 = MockInvocationId( - test_module_path="tests.test_module", - test_class_name=None, - test_function_name="test_with_optimized_only" - ) + assert result == expected - original_runtimes = {invocation_id1: [1000000]} # Only original - optimized_runtimes = {invocation_id2: [500000]} # Only optimized + def test_missing_optimized_runtime(self): + """Test when optimized runtime is missing (shows NaN).""" + function_to_tests = { + "module.function": {self.mock_function_called_in_test} + } + original_runtimes = { + self.mock_invocation_id: [1000000] + } + optimized_runtimes = {} result = existing_tests_source_for( - function_qualified_name, + "module.function", function_to_tests, - test_cfg, + self.test_cfg, original_runtimes, optimized_runtimes ) - expected = """- test_module.py - - test_with_optimized_only: NaN -> 500μs - - test_with_original_only: 1.00ms -> NaN - + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | +|:------------------------------------------|:--------------|---------------:|:--------------| +| `test_module.py::TestClass.test_function` | 1.00ms | nan | ❌ | """ + assert result == expected - def test_nested_test_directory(self): - """Test with nested test directories.""" - function_qualified_name = "module.function_name" - test_file_path = "/project/tests/unit/test_module.py" + def test_multiple_tests_sorted_output(self): + """Test multiple tests with sorted output by filename and function name.""" + # Create second test file + mock_function_called_2 = Mock() + mock_function_called_2.tests_in_file = Mock() + mock_function_called_2.tests_in_file.test_file = "/project/tests/test_another.py" + + mock_invocation_2 = Mock() + mock_invocation_2.test_module_path = "tests.test_another" + mock_invocation_2.test_class_name = "TestAnother" + mock_invocation_2.test_function_name = "test_another_function" function_to_tests = { - function_qualified_name: { - MockFunctionCalledInTest( - tests_in_file=MockTestsInFile(test_file=test_file_path) - ) - } + "module.function": {self.mock_function_called_in_test, mock_function_called_2} + } + original_runtimes = { + self.mock_invocation_id: [1000000], + mock_invocation_2: [2000000] + } + optimized_runtimes = { + self.mock_invocation_id: [800000], + mock_invocation_2: [1500000] } - - test_cfg = MockTestConfig( - tests_root=Path("/project/tests"), - project_root_path=Path("/project") - ) - - invocation_id = MockInvocationId( - test_module_path="tests.unit.test_module", - test_class_name=None, - test_function_name="test_function" - ) - - original_runtimes = {invocation_id: [1000000]} - optimized_runtimes = {invocation_id: [800000]} result = existing_tests_source_for( - function_qualified_name, + "module.function", function_to_tests, - test_cfg, + self.test_cfg, original_runtimes, optimized_runtimes ) - expected = """- unit/test_module.py - - test_function: 1.00ms -> 800μs $\\color{green}(20.00\\%)$ - + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | +|:-----------------------------------------------------|:--------------|:---------------|:--------------| +| `test_another.py::TestAnother.test_another_function` | 2.00ms | 1.50ms | ✅33.33% | +| `test_module.py::TestClass.test_function` | 1.00ms | 800μs | ✅25.00% | """ - assert result == expected - def test_multiple_invocations_same_test(self): - """Test when the same test has multiple invocations (runtimes are summed).""" - function_qualified_name = "module.function_name" - test_file_path = "/project/tests/test_module.py" + assert result == expected + def test_multiple_runtimes_uses_minimum(self): + """Test that function uses minimum runtime when multiple measurements exist.""" function_to_tests = { - function_qualified_name: { - MockFunctionCalledInTest( - tests_in_file=MockTestsInFile(test_file=test_file_path) - ) - } + "module.function": {self.mock_function_called_in_test} + } + original_runtimes = { + self.mock_invocation_id: [1000000, 1200000, 800000] # min: 800000 + } + optimized_runtimes = { + self.mock_invocation_id: [600000, 700000, 500000] # min: 500000 } - test_cfg = MockTestConfig( - tests_root=Path("/project/tests"), - project_root_path=Path("/project") + result = existing_tests_source_for( + "module.function", + function_to_tests, + self.test_cfg, + original_runtimes, + optimized_runtimes ) - # Same test function with multiple invocations - invocation_id1 = MockInvocationId( - test_module_path="tests.test_module", - test_class_name=None, - test_function_name="test_function" - ) + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | +|:------------------------------------------|:--------------|:---------------|:--------------| +| `test_module.py::TestClass.test_function` | 800μs | 500μs | ✅60.00% | +""" - invocation_id2 = MockInvocationId( - test_module_path="tests.test_module", - test_class_name=None, - test_function_name="test_function" - ) + assert result == expected + + def test_complex_module_path_conversion(self): + """Test conversion of complex module paths to file paths.""" + mock_invocation_complex = Mock() + mock_invocation_complex.test_module_path = "tests.integration.test_complex_module" + mock_invocation_complex.test_class_name = "TestComplex" + mock_invocation_complex.test_function_name = "test_complex_function" + mock_function_complex = Mock() + mock_function_complex.tests_in_file = Mock() + mock_function_complex.tests_in_file.test_file = f"/project/tests/integration/test_complex_module.py" + + function_to_tests = { + "module.function": {mock_function_complex} + } original_runtimes = { - invocation_id1: [1000000, 1200000], # min: 1ms - invocation_id2: [800000, 900000] # min: 0.8ms + mock_invocation_complex: [1000000] } optimized_runtimes = { - invocation_id1: [600000, 700000], # min: 0.6ms - invocation_id2: [400000, 500000] # min: 0.4ms + mock_invocation_complex: [750000] } result = existing_tests_source_for( - function_qualified_name, + "module.function", function_to_tests, - test_cfg, + self.test_cfg, original_runtimes, optimized_runtimes ) - # Total original: 1ms + 0.8ms = 1.8ms - # Total optimized: 0.6ms + 0.4ms = 1ms - expected = """- test_module.py - - test_function: 1.80ms -> 1.00ms $\\color{green}(44.44\\%)$ - + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | +|:------------------------------------------------------------------------|:--------------|:---------------|:--------------| +| `integration/test_complex_module.py::TestComplex.test_complex_function` | 1.00ms | 750μs | ✅33.33% | """ + assert result == expected def test_zero_runtime_values(self): """Test handling of zero runtime values.""" - function_qualified_name = "module.function_name" - test_file_path = "/project/tests/test_module.py" - function_to_tests = { - function_qualified_name: { - MockFunctionCalledInTest( - tests_in_file=MockTestsInFile(test_file=test_file_path) - ) - } + "module.function": {self.mock_function_called_in_test} + } + original_runtimes = { + self.mock_invocation_id: [0] + } + optimized_runtimes = { + self.mock_invocation_id: [0] } - test_cfg = MockTestConfig( - tests_root=Path("/project/tests"), - project_root_path=Path("/project") + result = existing_tests_source_for( + "module.function", + function_to_tests, + self.test_cfg, + original_runtimes, + optimized_runtimes ) - invocation_id = MockInvocationId( - test_module_path="tests.test_module", - test_class_name=None, - test_function_name="test_function" - ) + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | +|:------------------------------------------|--------------:|---------------:|:--------------| +| `test_module.py::TestClass.test_function` | nan | nan | ❌ | +""" + + assert result == expected + + def test_filters_out_generated_tests(self): + """Test that generated tests are filtered out and only non-generated tests are included.""" + # Create a test that would be filtered out (not in non_generated_tests) + mock_generated_test = Mock() + mock_generated_test.tests_in_file = Mock() + mock_generated_test.tests_in_file.test_file = "/project/tests/generated_test.py" - original_runtimes = {invocation_id: [0]} - optimized_runtimes = {invocation_id: [0]} + mock_generated_invocation = Mock() + mock_generated_invocation.test_module_path = "tests.generated_test" + mock_generated_invocation.test_class_name = "TestGenerated" + mock_generated_invocation.test_function_name = "test_generated" + + function_to_tests = { + "module.function": {self.mock_function_called_in_test} + } + original_runtimes = { + self.mock_invocation_id: [1000000], + mock_generated_invocation: [500000] # This should be filtered out + } + optimized_runtimes = { + self.mock_invocation_id: [800000], + mock_generated_invocation: [400000] # This should be filtered out + } result = existing_tests_source_for( - function_qualified_name, + "module.function", function_to_tests, - test_cfg, + self.test_cfg, original_runtimes, optimized_runtimes ) - expected = """- test_module.py - - test_function: NaN -> NaN - + # Should only include the non-generated test + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | +|:------------------------------------------|:--------------|:---------------|:--------------| +| `test_module.py::TestClass.test_function` | 1.00ms | 800μs | ✅25.00% | """ + assert result == expected + + From df6efe9dcb1358250336c29332a168cc0eb50269 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Tue, 17 Jun 2025 20:20:12 -0700 Subject: [PATCH 12/16] mypy fix --- codeflash/result/create_pr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index 518781a9e..3c828d744 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -130,11 +130,11 @@ def existing_tests_source_for( rows.append( [f"`{filename}::{qualified_name}`", f"{print_original_runtime}", f"{print_optimized_runtime}", "❌"] ) - output += tabulate( + output += tabulate( # type: ignore[no-untyped-call] headers=headers, tabular_data=rows, tablefmt="pipe", colglobalalign=None, preserve_whitespace=True ) output += "\n" - return output + return output # type: ignore[no-any-return] def check_create_pr( From 9ad40e24edc0579d96dd8bedd51264a2e65a8434 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Tue, 17 Jun 2025 20:23:48 -0700 Subject: [PATCH 13/16] mypy fix --- codeflash/result/create_pr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index 3c828d744..190d20e80 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -38,7 +38,7 @@ def existing_tests_source_for( test_files = function_to_tests.get(function_qualified_name_with_modules_from_root) if not test_files: return "" - output = "" + output: str = "" rows = [] headers = ["Test File::Test Function", "Original ⏱️", "Optimized ⏱️", "Improvement"] tests_root = test_cfg.tests_root @@ -134,7 +134,7 @@ def existing_tests_source_for( headers=headers, tabular_data=rows, tablefmt="pipe", colglobalalign=None, preserve_whitespace=True ) output += "\n" - return output # type: ignore[no-any-return] + return output def check_create_pr( From 4806815e18d38275c8f9f05e2236d3015eea531b Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Wed, 18 Jun 2025 14:40:48 -0700 Subject: [PATCH 14/16] ready to merge --- codeflash/code_utils/edit_generated_tests.py | 6 +- codeflash/code_utils/time_utils.py | 12 + codeflash/result/create_pr.py | 25 +- tests/test_add_runtime_comments.py | 520 +++++++++++++++++-- tests/test_existing_tests_source_for.py | 59 +-- tests/test_humanize_time.py | 102 +++- 6 files changed, 637 insertions(+), 87 deletions(-) diff --git a/codeflash/code_utils/edit_generated_tests.py b/codeflash/code_utils/edit_generated_tests.py index 94c18ab5c..afb33317c 100644 --- a/codeflash/code_utils/edit_generated_tests.py +++ b/codeflash/code_utils/edit_generated_tests.py @@ -5,7 +5,7 @@ import libcst as cst from codeflash.cli_cmds.console import logger -from codeflash.code_utils.time_utils import format_time +from codeflash.code_utils.time_utils import format_perf, format_time from codeflash.models.models import GeneratedTests, GeneratedTestsList, InvocationId from codeflash.result.critic import performance_gain from codeflash.verification.verification_utils import TestConfig @@ -131,11 +131,11 @@ def leave_SimpleStatementLine( if matching_original_times and matching_optimized_times: original_time = min(matching_original_times) optimized_time = min(matching_optimized_times) - perf_gain = ( + perf_gain = format_perf( performance_gain(original_runtime_ns=original_time, optimized_runtime_ns=optimized_time) * 100 ) # Create the runtime comment - comment_text = f"# {format_time(original_time)} -> {format_time(optimized_time)} ({perf_gain:.2f}%)" + comment_text = f"# {format_time(original_time)} -> {format_time(optimized_time)} ({perf_gain}%)" # Add comment to the trailing whitespace new_trailing_whitespace = cst.TrailingWhitespace( diff --git a/codeflash/code_utils/time_utils.py b/codeflash/code_utils/time_utils.py index 89273fe2d..4e32eedab 100644 --- a/codeflash/code_utils/time_utils.py +++ b/codeflash/code_utils/time_utils.py @@ -85,3 +85,15 @@ def format_time(nanoseconds: int) -> str: # This should never be reached, but included for completeness return f"{nanoseconds}ns" + + +def format_perf(percentage: float) -> str: + """Format percentage into a human-readable string with 3 significant digits when needed.""" + percentage_abs = abs(percentage) + if percentage_abs >= 100: + return f"{percentage:.0f}" + if percentage_abs >= 10: + return f"{percentage:.1f}" + if percentage_abs >= 1: + return f"{percentage:.2f}" + return f"{percentage:.3f}" diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index 190d20e80..a08875a4f 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -18,7 +18,7 @@ ) from codeflash.code_utils.github_utils import github_pr_url from codeflash.code_utils.tabulate import tabulate -from codeflash.code_utils.time_utils import format_time +from codeflash.code_utils.time_utils import format_perf, format_time from codeflash.github.PrComment import FileDiffContent, PrComment from codeflash.result.critic import performance_gain @@ -40,7 +40,7 @@ def existing_tests_source_for( return "" output: str = "" rows = [] - headers = ["Test File::Test Function", "Original ⏱️", "Optimized ⏱️", "Improvement"] + headers = ["Test File::Test Function", "Original ⏱️", "Optimized ⏱️", "Speedup"] tests_root = test_cfg.tests_root module_root = test_cfg.project_root_path rel_tests_root = tests_root.relative_to(module_root) @@ -84,23 +84,17 @@ def existing_tests_source_for( ].keys() # both will have the same keys as some default values are assigned in the previous loop for qualified_name in sorted(all_qualified_names): # if not present in optimized output nan - if optimized_tests_to_runtimes[filename][qualified_name] == 0: - print_optimized_runtime = "NaN" - else: - print_optimized_runtime = format_time(optimized_tests_to_runtimes[filename][qualified_name]) - if original_tests_to_runtimes[filename][qualified_name] == 0: - print_original_runtime = "NaN" - else: - print_original_runtime = format_time(original_tests_to_runtimes[filename][qualified_name]) if ( original_tests_to_runtimes[filename][qualified_name] != 0 and optimized_tests_to_runtimes[filename][qualified_name] != 0 ): + print_optimized_runtime = format_time(optimized_tests_to_runtimes[filename][qualified_name]) + print_original_runtime = format_time(original_tests_to_runtimes[filename][qualified_name]) greater = ( optimized_tests_to_runtimes[filename][qualified_name] > original_tests_to_runtimes[filename][qualified_name] ) - perf_gain = ( + perf_gain = format_perf( performance_gain( original_runtime_ns=original_tests_to_runtimes[filename][qualified_name], optimized_runtime_ns=optimized_tests_to_runtimes[filename][qualified_name], @@ -113,7 +107,7 @@ def existing_tests_source_for( f"`{filename}::{qualified_name}`", f"{print_original_runtime}", f"{print_optimized_runtime}", - f"⚠️{perf_gain:.2f}%", + f"⚠️{perf_gain}%", ] ) else: @@ -122,14 +116,9 @@ def existing_tests_source_for( f"`{filename}::{qualified_name}`", f"{print_original_runtime}", f"{print_optimized_runtime}", - f"✅{perf_gain:.2f}%", + f"✅{perf_gain}%", ] ) - else: - # one of them is NaN - rows.append( - [f"`{filename}::{qualified_name}`", f"{print_original_runtime}", f"{print_optimized_runtime}", "❌"] - ) output += tabulate( # type: ignore[no-untyped-call] headers=headers, tabular_data=rows, tablefmt="pipe", colglobalalign=None, preserve_whitespace=True ) diff --git a/tests/test_add_runtime_comments.py b/tests/test_add_runtime_comments.py index c553845e8..6a579bb85 100644 --- a/tests/test_add_runtime_comments.py +++ b/tests/test_add_runtime_comments.py @@ -5,10 +5,10 @@ import pytest from codeflash.code_utils.edit_generated_tests import add_runtime_comments_to_generated_tests -from codeflash.models.models import GeneratedTests, GeneratedTestsList, InvocationId +from codeflash.models.models import GeneratedTests, GeneratedTestsList, InvocationId, FunctionTestInvocation, TestType, \ + VerificationType, TestResults from codeflash.verification.verification_utils import TestConfig - @pytest.fixture def test_config(): """Create a mock TestConfig for testing.""" @@ -19,32 +19,484 @@ def test_config(): config.tests_root = Path("/project/tests") return config +class TestAddRuntimeComments: + """Test cases for add_runtime_comments_to_generated_tests method.""" + + def create_test_invocation( + self, test_function_name: str, runtime: int, loop_index: int = 1, iteration_id: str = "1", did_pass: bool = True + ) -> FunctionTestInvocation: + """Helper to create test invocation objects.""" + return FunctionTestInvocation( + loop_index=loop_index, + id=InvocationId( + test_module_path="tests.test_module", + test_class_name=None, + test_function_name=test_function_name, + function_getting_tested="test_function", + iteration_id=iteration_id, + ), + file_name=Path("tests/test.py"), + did_pass=did_pass, + runtime=runtime, + test_framework="pytest", + test_type=TestType.GENERATED_REGRESSION, + return_value=None, + timed_out=False, + verification_type=VerificationType.FUNCTION_CALL, + ) + + def test_basic_runtime_comment_addition(self, test_config): + """Test basic functionality of adding runtime comments.""" + # Create test source code + test_source = """def test_bubble_sort(): + codeflash_output = bubble_sort([3, 1, 2]) + assert codeflash_output == [1, 2, 3] +""" + + generated_test = GeneratedTests( + generated_original_test_source=test_source, + instrumented_behavior_test_source="", + instrumented_perf_test_source="", + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py"), + ) + """add_runtime_comments_to_generated_tests( + test_config, generated_tests, original_runtimes, optimized_runtimes + )""" + generated_tests = GeneratedTestsList(generated_tests=[generated_test]) + + # Create test results + original_test_results = TestResults() + optimized_test_results = TestResults() + + # Add test invocations with different runtimes + original_invocation = self.create_test_invocation("test_bubble_sort", 500_000) # 500μs + optimized_invocation = self.create_test_invocation("test_bubble_sort", 300_000) # 300μs + + original_test_results.add(original_invocation) + optimized_test_results.add(optimized_invocation) + original_runtimes = original_test_results.usable_runtime_data_by_test_case() + optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case() + # Test the functionality + result = add_runtime_comments_to_generated_tests(test_config, generated_tests, original_runtimes, optimized_runtimes) + + # Check that comments were added + modified_source = result.generated_tests[0].generated_original_test_source + assert "# 500μs -> 300μs" in modified_source + assert "codeflash_output = bubble_sort([3, 1, 2]) # 500μs -> 300μs" in modified_source + + def test_multiple_test_functions(self, test_config): + """Test handling multiple test functions in the same file.""" + test_source = """def test_bubble_sort(): + codeflash_output = bubble_sort([3, 1, 2]) + assert codeflash_output == [1, 2, 3] + +def test_quick_sort(): + codeflash_output = quick_sort([5, 2, 8]) + assert codeflash_output == [2, 5, 8] + +def helper_function(): + return "not a test" +""" + + generated_test = GeneratedTests( + generated_original_test_source=test_source, + instrumented_behavior_test_source="", + instrumented_perf_test_source="", + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py") + ) + + generated_tests = GeneratedTestsList(generated_tests=[generated_test]) + + # Create test results for both functions + original_test_results = TestResults() + optimized_test_results = TestResults() + + # Add test invocations for both test functions + original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000)) + original_test_results.add(self.create_test_invocation("test_quick_sort", 800_000)) + + optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000)) + optimized_test_results.add(self.create_test_invocation("test_quick_sort", 600_000)) + + original_runtimes = original_test_results.usable_runtime_data_by_test_case() + optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case() + + # Test the functionality + result = add_runtime_comments_to_generated_tests(test_config, generated_tests, original_runtimes, optimized_runtimes) + + modified_source = result.generated_tests[0].generated_original_test_source + + # Check that comments were added to both test functions + assert "# 500μs -> 300μs" in modified_source + assert "# 800μs -> 600μs" in modified_source + # Helper function should not have comments + assert ( + "helper_function():" in modified_source + and "# " not in modified_source.split("helper_function():")[1].split("\n")[0] + ) + + def test_different_time_formats(self, test_config): + """Test that different time ranges are formatted correctly with new precision rules.""" + test_cases = [ + (999, 500, "999ns -> 500ns"), # nanoseconds + (25_000, 18_000, "25.0μs -> 18.0μs"), # microseconds with precision + (500_000, 300_000, "500μs -> 300μs"), # microseconds full integers + (1_500_000, 800_000, "1.50ms -> 800μs"), # milliseconds with precision + (365_000_000, 290_000_000, "365ms -> 290ms"), # milliseconds full integers + (2_000_000_000, 1_500_000_000, "2.00s -> 1.50s"), # seconds with precision + ] + + for original_time, optimized_time, expected_comment in test_cases: + test_source = """def test_function(): + codeflash_output = some_function() + assert codeflash_output is not None +""" + + generated_test = GeneratedTests( + generated_original_test_source=test_source, + instrumented_behavior_test_source="", + instrumented_perf_test_source="", + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py") + ) + + generated_tests = GeneratedTestsList(generated_tests=[generated_test]) + + # Create test results + original_test_results = TestResults() + optimized_test_results = TestResults() + + original_test_results.add(self.create_test_invocation("test_function", original_time)) + optimized_test_results.add(self.create_test_invocation("test_function", optimized_time)) + + original_runtimes = original_test_results.usable_runtime_data_by_test_case() + optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case() + # Test the functionality + result = add_runtime_comments_to_generated_tests( + test_config, generated_tests, original_runtimes, optimized_runtimes + ) + + modified_source = result.generated_tests[0].generated_original_test_source + assert f"# {expected_comment}" in modified_source + + def test_missing_test_results(self, test_config): + """Test behavior when test results are missing for a test function.""" + test_source = """def test_bubble_sort(): + codeflash_output = bubble_sort([3, 1, 2]) + assert codeflash_output == [1, 2, 3] +""" + + generated_test = GeneratedTests( + generated_original_test_source=test_source, + instrumented_behavior_test_source="", + instrumented_perf_test_source="", + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py") + ) + + generated_tests = GeneratedTestsList(generated_tests=[generated_test]) + + # Create empty test results + original_test_results = TestResults() + optimized_test_results = TestResults() + + original_runtimes = original_test_results.usable_runtime_data_by_test_case() + optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case() + + # Test the functionality + result = add_runtime_comments_to_generated_tests(test_config, generated_tests, original_runtimes, optimized_runtimes) + + # Check that no comments were added + modified_source = result.generated_tests[0].generated_original_test_source + assert modified_source == test_source # Should be unchanged + + def test_partial_test_results(self, test_config): + """Test behavior when only one set of test results is available.""" + test_source = """def test_bubble_sort(): + codeflash_output = bubble_sort([3, 1, 2]) + assert codeflash_output == [1, 2, 3] +""" + + generated_test = GeneratedTests( + generated_original_test_source=test_source, + instrumented_behavior_test_source="", + instrumented_perf_test_source="", + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py") + ) + + generated_tests = GeneratedTestsList(generated_tests=[generated_test]) + + # Create test results with only original data + original_test_results = TestResults() + optimized_test_results = TestResults() + + original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000)) + # No optimized results + original_runtimes = original_test_results.usable_runtime_data_by_test_case() + optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case() + # Test the functionality + result = add_runtime_comments_to_generated_tests(test_config, generated_tests, original_runtimes, optimized_runtimes) + + # Check that no comments were added + modified_source = result.generated_tests[0].generated_original_test_source + assert modified_source == test_source # Should be unchanged + + def test_multiple_runtimes_uses_minimum(self, test_config): + """Test that when multiple runtimes exist, the minimum is used.""" + test_source = """def test_bubble_sort(): + codeflash_output = bubble_sort([3, 1, 2]) + assert codeflash_output == [1, 2, 3] +""" + + generated_test = GeneratedTests( + generated_original_test_source=test_source, + instrumented_behavior_test_source="", + instrumented_perf_test_source="", + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py") + ) + + generated_tests = GeneratedTestsList(generated_tests=[generated_test]) + + # Create test results with multiple loop iterations + original_test_results = TestResults() + optimized_test_results = TestResults() + + # Add multiple runs with different runtimes + original_test_results.add(self.create_test_invocation("test_bubble_sort", 600_000, loop_index=1)) + original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000, loop_index=2)) + original_test_results.add(self.create_test_invocation("test_bubble_sort", 550_000, loop_index=3)) + + optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 350_000, loop_index=1)) + optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000, loop_index=2)) + optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 320_000, loop_index=3)) + + original_runtimes = original_test_results.usable_runtime_data_by_test_case() + optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case() + # Test the functionality + result = add_runtime_comments_to_generated_tests(test_config, generated_tests, original_runtimes, optimized_runtimes) + + # Check that minimum times were used (500μs -> 300μs) + modified_source = result.generated_tests[0].generated_original_test_source + assert "# 500μs -> 300μs" in modified_source + + def test_no_codeflash_output_assignment(self, test_config): + """Test behavior when test doesn't have codeflash_output assignment.""" + test_source = """def test_bubble_sort(): + result = bubble_sort([3, 1, 2]) + assert result == [1, 2, 3] +""" + + generated_test = GeneratedTests( + generated_original_test_source=test_source, + instrumented_behavior_test_source="", + instrumented_perf_test_source="", + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py") + ) + + generated_tests = GeneratedTestsList(generated_tests=[generated_test]) + + # Create test results + original_test_results = TestResults() + optimized_test_results = TestResults() + + original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000)) + optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000)) + + original_runtimes = original_test_results.usable_runtime_data_by_test_case() + optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case() + + # Test the functionality + result = add_runtime_comments_to_generated_tests(test_config, generated_tests, original_runtimes, optimized_runtimes) + + # Check that no comments were added (no codeflash_output assignment) + modified_source = result.generated_tests[0].generated_original_test_source + assert modified_source == test_source # Should be unchanged + + def test_invalid_python_code_handling(self, test_config): + """Test behavior when test source code is invalid Python.""" + test_source = """def test_bubble_sort(: + codeflash_output = bubble_sort([3, 1, 2]) + assert codeflash_output == [1, 2, 3] +""" # Invalid syntax: extra colon + + generated_test = GeneratedTests( + generated_original_test_source=test_source, + instrumented_behavior_test_source="", + instrumented_perf_test_source="", + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py") + ) + + generated_tests = GeneratedTestsList(generated_tests=[generated_test]) + + # Create test results + original_test_results = TestResults() + optimized_test_results = TestResults() + + original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000)) + optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000)) + + original_runtimes = original_test_results.usable_runtime_data_by_test_case() + optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case() + + # Test the functionality - should handle parse error gracefully + result = add_runtime_comments_to_generated_tests(test_config, generated_tests, original_runtimes, optimized_runtimes) + + # Check that original test is preserved when parsing fails + modified_source = result.generated_tests[0].generated_original_test_source + assert modified_source == test_source # Should be unchanged due to parse error + + def test_multiple_generated_tests(self, test_config): + """Test handling multiple generated test objects.""" + test_source_1 = """def test_bubble_sort(): + codeflash_output = bubble_sort([3, 1, 2]) + assert codeflash_output == [1, 2, 3] +""" + + test_source_2 = """def test_quick_sort(): + codeflash_output = quick_sort([5, 2, 8]) + assert codeflash_output == [2, 5, 8] +""" + + generated_test_1 = GeneratedTests( + generated_original_test_source=test_source_1, + instrumented_behavior_test_source="", + instrumented_perf_test_source="", + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py") + ) + + generated_test_2 = GeneratedTests( + generated_original_test_source=test_source_2, + instrumented_behavior_test_source="", + instrumented_perf_test_source="", + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py") + ) + + generated_tests = GeneratedTestsList(generated_tests=[generated_test_1, generated_test_2]) + + # Create test results + original_test_results = TestResults() + optimized_test_results = TestResults() + + original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000)) + original_test_results.add(self.create_test_invocation("test_quick_sort", 800_000)) + + optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000)) + optimized_test_results.add(self.create_test_invocation("test_quick_sort", 600_000)) + + original_runtimes = original_test_results.usable_runtime_data_by_test_case() + optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case() + + # Test the functionality + result = add_runtime_comments_to_generated_tests(test_config, generated_tests, original_runtimes, optimized_runtimes) + + # Check that comments were added to both test files + modified_source_1 = result.generated_tests[0].generated_original_test_source + modified_source_2 = result.generated_tests[1].generated_original_test_source + + assert "# 500μs -> 300μs" in modified_source_1 + assert "# 800μs -> 600μs" in modified_source_2 + + def test_preserved_test_attributes(self, test_config): + """Test that other test attributes are preserved during modification.""" + test_source = """def test_bubble_sort(): + codeflash_output = bubble_sort([3, 1, 2]) + assert codeflash_output == [1, 2, 3] +""" + + original_behavior_source = "behavior test source" + original_perf_source = "perf test source" + original_behavior_path = Path("/project/tests/test_module.py") + original_perf_path = Path("/project/tests/test_module_perf.py") + + generated_test = GeneratedTests( + generated_original_test_source=test_source, + instrumented_behavior_test_source=original_behavior_source, + instrumented_perf_test_source=original_perf_source, + behavior_file_path=original_behavior_path, + perf_file_path=original_perf_path + ) + + generated_tests = GeneratedTestsList(generated_tests=[generated_test]) + + # Create test results + original_test_results = TestResults() + optimized_test_results = TestResults() + + original_test_results.add(self.create_test_invocation("test_bubble_sort", 500_000)) + optimized_test_results.add(self.create_test_invocation("test_bubble_sort", 300_000)) + + original_runtimes = original_test_results.usable_runtime_data_by_test_case() + optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case() + # Test the functionality + result = add_runtime_comments_to_generated_tests(test_config, generated_tests, original_runtimes, optimized_runtimes) + + # Check that other attributes are preserved + modified_test = result.generated_tests[0] + assert modified_test.instrumented_behavior_test_source == original_behavior_source + assert modified_test.instrumented_perf_test_source == original_perf_source + assert modified_test.behavior_file_path == original_behavior_path + assert modified_test.perf_file_path == original_perf_path + + # Check that only the generated_original_test_source was modified + assert "# 500μs -> 300μs" in modified_test.generated_original_test_source + + def test_multistatement_line_handling(self, test_config): + """Test that runtime comments work correctly with multiple statements on one line.""" + test_source = """def test_mutation_of_input(): + # Test that the input list is mutated in-place and returned + arr = [3, 1, 2] + codeflash_output = sorter(arr); result = codeflash_output + assert result == [1, 2, 3] + assert arr == [1, 2, 3] # Input should be mutated +""" + + generated_test = GeneratedTests( + generated_original_test_source=test_source, + instrumented_behavior_test_source="", + instrumented_perf_test_source="", + behavior_file_path=Path("/project/tests/test_module.py"), + perf_file_path=Path("/project/tests/test_module_perf.py") + ) + + generated_tests = GeneratedTestsList(generated_tests=[generated_test]) + + # Create test results + original_test_results = TestResults() + optimized_test_results = TestResults() + + original_test_results.add(self.create_test_invocation("test_mutation_of_input", 19_000)) # 19μs + optimized_test_results.add(self.create_test_invocation("test_mutation_of_input", 14_000)) # 14μs + + original_runtimes = original_test_results.usable_runtime_data_by_test_case() + optimized_runtimes = optimized_test_results.usable_runtime_data_by_test_case() + + # Test the functionality + result = add_runtime_comments_to_generated_tests(test_config, generated_tests, original_runtimes, optimized_runtimes) + + # Check that comments were added to the correct line + modified_source = result.generated_tests[0].generated_original_test_source + assert "# 19.0μs -> 14.0μs" in modified_source + + # Verify the comment is on the line with codeflash_output assignment + lines = modified_source.split("\n") + codeflash_line = None + for line in lines: + if "codeflash_output = sorter(arr)" in line: + codeflash_line = line + break + + assert codeflash_line is not None, "Could not find codeflash_output assignment line" + assert "# 19.0μs -> 14.0μs" in codeflash_line, f"Comment not found in the correct line: {codeflash_line}" + -# @pytest.fixture -# def sample_invocation_id(): -# """Create a sample InvocationId for testing.""" -# return InvocationId( -# test_module_path="test_module_path", -# test_class_name="test_class_name", -# test_function_name="test_function_name", -# function_getting_tested="function_getting_tested", -# iteration_id="0", -# ) -# -# -# @pytest.fixture -# def sample_invocation_id_no_class(): -# """Create a sample InvocationId without class for testing.""" -# return InvocationId( -# test_module_path="test_module_path", -# test_class_name=None, -# test_function_name="test_function_name", -# function_getting_tested="function_getting_tested", -# iteration_id="0", -# ) - - -class TestAddRuntimeCommentsToGeneratedTests: def test_add_runtime_comments_simple_function(self, test_config): """Test adding runtime comments to a simple test function.""" test_source = '''def test_function(): @@ -78,7 +530,7 @@ def test_add_runtime_comments_simple_function(self, test_config): ) expected_source = '''def test_function(): - codeflash_output = some_function() # 1.00s -> 500ms (100.00%) + codeflash_output = some_function() # 1.00s -> 500ms (100%) assert codeflash_output == expected ''' @@ -121,7 +573,7 @@ def test_function(self): expected_source = '''class TestClass: def test_function(self): - codeflash_output = some_function() # 2.00s -> 1.00s (100.00%) + codeflash_output = some_function() # 2.00s -> 1.00s (100%) assert codeflash_output == expected ''' @@ -165,9 +617,9 @@ def test_add_runtime_comments_multiple_assignments(self, test_config): expected_source = '''def test_function(): setup_data = prepare_test() - codeflash_output = some_function() # 1.50s -> 750ms (100.00%) + codeflash_output = some_function() # 1.50s -> 750ms (100%) assert codeflash_output == expected - codeflash_output = another_function() # 1.50s -> 750ms (100.00%) + codeflash_output = another_function() # 1.50s -> 750ms (100%) assert codeflash_output == expected2 ''' @@ -307,12 +759,12 @@ def test_add_runtime_comments_multiple_tests(self, test_config): ) expected_source1 = '''def test_function1(): - codeflash_output = some_function() # 1.00s -> 500ms (100.00%) + codeflash_output = some_function() # 1.00s -> 500ms (100%) assert codeflash_output == expected ''' expected_source2 = '''def test_function2(): - codeflash_output = another_function() # 2.00s -> 800ms (150.00%) + codeflash_output = another_function() # 2.00s -> 800ms (150%) assert codeflash_output == expected ''' @@ -353,7 +805,7 @@ def test_add_runtime_comments_performance_regression(self, test_config): ) expected_source = '''def test_function(): - codeflash_output = some_function() # 1.00s -> 1.50s (-33.33%) + codeflash_output = some_function() # 1.00s -> 1.50s (-33.3%) assert codeflash_output == expected ''' diff --git a/tests/test_existing_tests_source_for.py b/tests/test_existing_tests_source_for.py index 945de6d84..8940b20d2 100644 --- a/tests/test_existing_tests_source_for.py +++ b/tests/test_existing_tests_source_for.py @@ -64,9 +64,9 @@ def test_single_test_with_improvement(self): optimized_runtimes ) - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | -|:------------------------------------------|:--------------|:---------------|:--------------| -| `test_module.py::TestClass.test_function` | 1.00ms | 500μs | ✅100.00% | + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | +|:------------------------------------------|:--------------|:---------------|:----------| +| `test_module.py::TestClass.test_function` | 1.00ms | 500μs | ✅100% | """ assert result == expected @@ -91,9 +91,9 @@ def test_single_test_with_regression(self): optimized_runtimes ) - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | -|:------------------------------------------|:--------------|:---------------|:--------------| -| `test_module.py::TestClass.test_function` | 500μs | 1.00ms | ⚠️-50.00% | + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | +|:------------------------------------------|:--------------|:---------------|:----------| +| `test_module.py::TestClass.test_function` | 500μs | 1.00ms | ⚠️-50.0% | """ assert result == expected @@ -123,9 +123,9 @@ def test_test_without_class_name(self): optimized_runtimes ) - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | -|:----------------------------------|:--------------|:---------------|:--------------| -| `test_module.py::test_standalone` | 1.00ms | 800μs | ✅25.00% | + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | +|:----------------------------------|:--------------|:---------------|:----------| +| `test_module.py::test_standalone` | 1.00ms | 800μs | ✅25.0% | """ assert result == expected @@ -148,9 +148,8 @@ def test_missing_original_runtime(self): optimized_runtimes ) - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | -|:------------------------------------------|--------------:|:---------------|:--------------| -| `test_module.py::TestClass.test_function` | nan | 500μs | ❌ | + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | +|----------------------------|---------------|----------------|-----------| """ assert result == expected @@ -173,9 +172,8 @@ def test_missing_optimized_runtime(self): optimized_runtimes ) - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | -|:------------------------------------------|:--------------|---------------:|:--------------| -| `test_module.py::TestClass.test_function` | 1.00ms | nan | ❌ | + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | +|----------------------------|---------------|----------------|-----------| """ assert result == expected @@ -212,10 +210,10 @@ def test_multiple_tests_sorted_output(self): optimized_runtimes ) - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | -|:-----------------------------------------------------|:--------------|:---------------|:--------------| -| `test_another.py::TestAnother.test_another_function` | 2.00ms | 1.50ms | ✅33.33% | -| `test_module.py::TestClass.test_function` | 1.00ms | 800μs | ✅25.00% | + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | +|:-----------------------------------------------------|:--------------|:---------------|:----------| +| `test_another.py::TestAnother.test_another_function` | 2.00ms | 1.50ms | ✅33.3% | +| `test_module.py::TestClass.test_function` | 1.00ms | 800μs | ✅25.0% | """ assert result == expected @@ -240,9 +238,9 @@ def test_multiple_runtimes_uses_minimum(self): optimized_runtimes ) - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | -|:------------------------------------------|:--------------|:---------------|:--------------| -| `test_module.py::TestClass.test_function` | 800μs | 500μs | ✅60.00% | + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | +|:------------------------------------------|:--------------|:---------------|:----------| +| `test_module.py::TestClass.test_function` | 800μs | 500μs | ✅60.0% | """ assert result == expected @@ -276,9 +274,9 @@ def test_complex_module_path_conversion(self): optimized_runtimes ) - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | -|:------------------------------------------------------------------------|:--------------|:---------------|:--------------| -| `integration/test_complex_module.py::TestComplex.test_complex_function` | 1.00ms | 750μs | ✅33.33% | + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | +|:------------------------------------------------------------------------|:--------------|:---------------|:----------| +| `integration/test_complex_module.py::TestComplex.test_complex_function` | 1.00ms | 750μs | ✅33.3% | """ assert result == expected @@ -303,9 +301,8 @@ def test_zero_runtime_values(self): optimized_runtimes ) - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | -|:------------------------------------------|--------------:|---------------:|:--------------| -| `test_module.py::TestClass.test_function` | nan | nan | ❌ | + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | +|----------------------------|---------------|----------------|-----------| """ assert result == expected @@ -343,9 +340,9 @@ def test_filters_out_generated_tests(self): ) # Should only include the non-generated test - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Improvement | -|:------------------------------------------|:--------------|:---------------|:--------------| -| `test_module.py::TestClass.test_function` | 1.00ms | 800μs | ✅25.00% | + expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | +|:------------------------------------------|:--------------|:---------------|:----------| +| `test_module.py::TestClass.test_function` | 1.00ms | 800μs | ✅25.0% | """ assert result == expected diff --git a/tests/test_humanize_time.py b/tests/test_humanize_time.py index 4021b077e..ecc5e16d7 100644 --- a/tests/test_humanize_time.py +++ b/tests/test_humanize_time.py @@ -1,4 +1,5 @@ from codeflash.code_utils.time_utils import humanize_runtime, format_time +from codeflash.code_utils.time_utils import format_perf import pytest @@ -172,4 +173,103 @@ def test_negative_values(self): # This test depends on whether your function should handle negative values # You might want to modify based on expected behavior with pytest.raises((ValueError, TypeError)) or pytest.warns(): - format_time(-1000) \ No newline at end of file + format_time(-1000) + + +class TestFormatPerf: + """Test cases for the format_perf function.""" + + def test_format_perf_large_values_above_100(self): + """Test formatting for values above 100 (no decimal places).""" + assert format_perf(150.789) == "151" + assert format_perf(999.999) == "1000" + assert format_perf(100.1) == "100" + assert format_perf(500) == "500" + assert format_perf(1000.5) == "1000" + + def test_format_perf_medium_values_10_to_100(self): + """Test formatting for values between 10 and 100 (1 decimal place).""" + assert format_perf(99.99) == "100.0" + assert format_perf(50.789) == "50.8" + assert format_perf(10.1) == "10.1" + assert format_perf(25.0) == "25.0" + assert format_perf(33.333) == "33.3" + + def test_format_perf_small_values_1_to_10(self): + """Test formatting for values between 1 and 10 (2 decimal places).""" + assert format_perf(9.999) == "10.00" + assert format_perf(5.789) == "5.79" + assert format_perf(1.1) == "1.10" + assert format_perf(2.0) == "2.00" + assert format_perf(7.123) == "7.12" + + def test_format_perf_very_small_values_below_1(self): + """Test formatting for values below 1 (3 decimal places).""" + assert format_perf(0.999) == "0.999" + assert format_perf(0.5) == "0.500" + assert format_perf(0.123) == "0.123" + assert format_perf(0.001) == "0.001" + assert format_perf(0.0) == "0.000" + + def test_format_perf_negative_values(self): + """Test formatting for negative values (uses absolute value for comparison).""" + assert format_perf(-150.789) == "-151" + assert format_perf(-50.789) == "-50.8" + assert format_perf(-5.789) == "-5.79" + assert format_perf(-0.999) == "-0.999" + assert format_perf(-0.0) == "-0.000" + + def test_format_perf_boundary_values(self): + """Test formatting for exact boundary values.""" + assert format_perf(100.0) == "100" + assert format_perf(10.0) == "10.0" + assert format_perf(1.0) == "1.00" + assert format_perf(-100.0) == "-100" + assert format_perf(-10.0) == "-10.0" + assert format_perf(-1.0) == "-1.00" + + def test_format_perf_integer_inputs(self): + """Test formatting with integer inputs.""" + assert format_perf(150) == "150" + assert format_perf(50) == "50.0" + assert format_perf(5) == "5.00" + assert format_perf(0) == "0.000" + assert format_perf(-150) == "-150" + assert format_perf(-50) == "-50.0" + assert format_perf(-5) == "-5.00" + + def test_format_perf_float_inputs(self): + """Test formatting with float inputs.""" + assert format_perf(123.456) == "123" + assert format_perf(12.3456) == "12.3" + assert format_perf(1.23456) == "1.23" + assert format_perf(0.123456) == "0.123" + + def test_format_perf_edge_cases(self): + """Test formatting for edge cases and special values.""" + # Very large numbers + assert format_perf(999999.99) == "1000000" + assert format_perf(1000000) == "1000000" + + # Very small positive numbers + assert format_perf(0.0001) == "0.000" + assert format_perf(0.00001) == "0.000" + + # Numbers very close to boundaries + assert format_perf(99.9999) == "100.0" + assert format_perf(9.9999) == "10.00" + assert format_perf(0.9999) == "1.000" + + def test_format_perf_rounding_behavior(self): + """Test that rounding behavior is consistent.""" + # Test rounding up + assert format_perf(100.5) == "100" + assert format_perf(10.55) == "10.6" + assert format_perf(1.555) == "1.55" + assert format_perf(0.1555) == "0.155" + + # Test rounding down + assert format_perf(100.4) == "100" + assert format_perf(10.54) == "10.5" + assert format_perf(1.554) == "1.55" + assert format_perf(0.1554) == "0.155" \ No newline at end of file From 36b9c753c209d49d759950d924a3649488ce6674 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Wed, 18 Jun 2025 14:48:30 -0700 Subject: [PATCH 15/16] non zero runtimes for comments --- codeflash/code_utils/edit_generated_tests.py | 30 +++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/codeflash/code_utils/edit_generated_tests.py b/codeflash/code_utils/edit_generated_tests.py index afb33317c..7996bce3d 100644 --- a/codeflash/code_utils/edit_generated_tests.py +++ b/codeflash/code_utils/edit_generated_tests.py @@ -131,20 +131,22 @@ def leave_SimpleStatementLine( if matching_original_times and matching_optimized_times: original_time = min(matching_original_times) optimized_time = min(matching_optimized_times) - perf_gain = format_perf( - performance_gain(original_runtime_ns=original_time, optimized_runtime_ns=optimized_time) * 100 - ) - # Create the runtime comment - comment_text = f"# {format_time(original_time)} -> {format_time(optimized_time)} ({perf_gain}%)" - - # Add comment to the trailing whitespace - new_trailing_whitespace = cst.TrailingWhitespace( - whitespace=cst.SimpleWhitespace(" "), - comment=cst.Comment(comment_text), - newline=updated_node.trailing_whitespace.newline, - ) - - return updated_node.with_changes(trailing_whitespace=new_trailing_whitespace) + if original_time != 0 and optimized_time != 0: + perf_gain = format_perf( + performance_gain(original_runtime_ns=original_time, optimized_runtime_ns=optimized_time) + * 100 + ) + # Create the runtime comment + comment_text = f"# {format_time(original_time)} -> {format_time(optimized_time)} ({perf_gain}%)" + + # Add comment to the trailing whitespace + new_trailing_whitespace = cst.TrailingWhitespace( + whitespace=cst.SimpleWhitespace(" "), + comment=cst.Comment(comment_text), + newline=updated_node.trailing_whitespace.newline, + ) + + return updated_node.with_changes(trailing_whitespace=new_trailing_whitespace) return updated_node From 70360c6ab537c93b8208a8f5885d03b4be84fa5f Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Wed, 18 Jun 2025 15:31:08 -0700 Subject: [PATCH 16/16] absolute value of percentage --- codeflash/code_utils/edit_generated_tests.py | 11 ++++++++--- tests/test_add_runtime_comments.py | 14 +++++++------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/codeflash/code_utils/edit_generated_tests.py b/codeflash/code_utils/edit_generated_tests.py index 7996bce3d..547dbc92b 100644 --- a/codeflash/code_utils/edit_generated_tests.py +++ b/codeflash/code_utils/edit_generated_tests.py @@ -133,11 +133,16 @@ def leave_SimpleStatementLine( optimized_time = min(matching_optimized_times) if original_time != 0 and optimized_time != 0: perf_gain = format_perf( - performance_gain(original_runtime_ns=original_time, optimized_runtime_ns=optimized_time) - * 100 + abs( + performance_gain(original_runtime_ns=original_time, optimized_runtime_ns=optimized_time) + * 100 + ) ) + status = "slower" if optimized_time > original_time else "faster" # Create the runtime comment - comment_text = f"# {format_time(original_time)} -> {format_time(optimized_time)} ({perf_gain}%)" + comment_text = ( + f"# {format_time(original_time)} -> {format_time(optimized_time)} ({perf_gain}% {status})" + ) # Add comment to the trailing whitespace new_trailing_whitespace = cst.TrailingWhitespace( diff --git a/tests/test_add_runtime_comments.py b/tests/test_add_runtime_comments.py index 6a579bb85..66a77b0d0 100644 --- a/tests/test_add_runtime_comments.py +++ b/tests/test_add_runtime_comments.py @@ -530,7 +530,7 @@ def test_add_runtime_comments_simple_function(self, test_config): ) expected_source = '''def test_function(): - codeflash_output = some_function() # 1.00s -> 500ms (100%) + codeflash_output = some_function() # 1.00s -> 500ms (100% faster) assert codeflash_output == expected ''' @@ -573,7 +573,7 @@ def test_function(self): expected_source = '''class TestClass: def test_function(self): - codeflash_output = some_function() # 2.00s -> 1.00s (100%) + codeflash_output = some_function() # 2.00s -> 1.00s (100% faster) assert codeflash_output == expected ''' @@ -617,9 +617,9 @@ def test_add_runtime_comments_multiple_assignments(self, test_config): expected_source = '''def test_function(): setup_data = prepare_test() - codeflash_output = some_function() # 1.50s -> 750ms (100%) + codeflash_output = some_function() # 1.50s -> 750ms (100% faster) assert codeflash_output == expected - codeflash_output = another_function() # 1.50s -> 750ms (100%) + codeflash_output = another_function() # 1.50s -> 750ms (100% faster) assert codeflash_output == expected2 ''' @@ -759,12 +759,12 @@ def test_add_runtime_comments_multiple_tests(self, test_config): ) expected_source1 = '''def test_function1(): - codeflash_output = some_function() # 1.00s -> 500ms (100%) + codeflash_output = some_function() # 1.00s -> 500ms (100% faster) assert codeflash_output == expected ''' expected_source2 = '''def test_function2(): - codeflash_output = another_function() # 2.00s -> 800ms (150%) + codeflash_output = another_function() # 2.00s -> 800ms (150% faster) assert codeflash_output == expected ''' @@ -805,7 +805,7 @@ def test_add_runtime_comments_performance_regression(self, test_config): ) expected_source = '''def test_function(): - codeflash_output = some_function() # 1.00s -> 1.50s (-33.3%) + codeflash_output = some_function() # 1.00s -> 1.50s (33.3% slower) assert codeflash_output == expected '''