diff --git a/codeflash/api/cfapi.py b/codeflash/api/cfapi.py index 7008d13f5..48e7f92cf 100644 --- a/codeflash/api/cfapi.py +++ b/codeflash/api/cfapi.py @@ -124,6 +124,8 @@ def suggest_changes( generated_tests: str, trace_id: str, coverage_message: str, + replay_tests: str = "", + concolic_tests: str = "", ) -> Response: """Suggest changes to a pull request. @@ -147,6 +149,8 @@ def suggest_changes( "generatedTests": generated_tests, "traceId": trace_id, "coverage_message": coverage_message, + "replayTests": replay_tests, + "concolicTests": concolic_tests, } return make_cfapi_request(endpoint="/suggest-pr-changes", method="POST", payload=payload) @@ -161,6 +165,8 @@ def create_pr( generated_tests: str, trace_id: str, coverage_message: str, + replay_tests: str = "", + concolic_tests: str = "", ) -> Response: """Create a pull request, targeting the specified branch. (usually 'main'). @@ -183,6 +189,8 @@ def create_pr( "generatedTests": generated_tests, "traceId": trace_id, "coverage_message": coverage_message, + "replayTests": replay_tests, + "concolicTests": concolic_tests, } return make_cfapi_request(endpoint="/create-pr", method="POST", payload=payload) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index d009c3b9c..d46a3aff9 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -1196,7 +1196,7 @@ def process_review( if concolic_test_str: generated_tests_str += "\n#------------------------------------------------\n" + concolic_test_str - existing_tests = existing_tests_source_for( + existing_tests, replay_tests, concolic_tests = existing_tests_source_for( self.function_to_optimize.qualified_name_with_modules_from_root(self.project_root), function_to_all_tests, test_cfg=self.test_cfg, @@ -1237,6 +1237,8 @@ def process_review( if self.experiment_id else self.function_trace_id, "coverage_message": coverage_message, + "replay_tests": replay_tests, + "concolic_tests": concolic_tests, } if not self.args.no_pr and not self.args.staging_review: diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py index 2134cee09..51f6b48af 100644 --- a/codeflash/result/create_pr.py +++ b/codeflash/result/create_pr.py @@ -34,12 +34,16 @@ def existing_tests_source_for( test_cfg: TestConfig, original_runtimes_all: dict[InvocationId, list[int]], optimized_runtimes_all: dict[InvocationId, list[int]], -) -> str: +) -> tuple[str, str, str]: test_files = function_to_tests.get(function_qualified_name_with_modules_from_root) if not test_files: - return "" - output: str = "" - rows = [] + return "", "", "" + output_existing: str = "" + output_concolic: str = "" + output_replay: str = "" + rows_existing = [] + rows_concolic = [] + rows_replay = [] headers = ["Test File::Test Function", "Original ⏱️", "Optimized ⏱️", "Speedup"] tests_root = test_cfg.tests_root original_tests_to_runtimes: dict[Path, dict[str, int]] = {} @@ -99,28 +103,79 @@ def existing_tests_source_for( * 100 ) if greater: - rows.append( + if "__replay_test_" in str(print_filename): + rows_replay.append( + [ + f"`{print_filename}::{qualified_name}`", + f"{print_original_runtime}", + f"{print_optimized_runtime}", + f"{perf_gain}%⚠️", + ] + ) + elif "codeflash_concolic" in str(print_filename): + rows_concolic.append( + [ + f"`{print_filename}::{qualified_name}`", + f"{print_original_runtime}", + f"{print_optimized_runtime}", + f"{perf_gain}%⚠️", + ] + ) + else: + rows_existing.append( + [ + f"`{print_filename}::{qualified_name}`", + f"{print_original_runtime}", + f"{print_optimized_runtime}", + f"{perf_gain}%⚠️", + ] + ) + elif "__replay_test_" in str(print_filename): + rows_replay.append( [ f"`{print_filename}::{qualified_name}`", f"{print_original_runtime}", f"{print_optimized_runtime}", - f"⚠️{perf_gain}%", + f"{perf_gain}%✅", + ] + ) + elif "codeflash_concolic" in str(print_filename): + rows_concolic.append( + [ + f"`{print_filename}::{qualified_name}`", + f"{print_original_runtime}", + f"{print_optimized_runtime}", + f"{perf_gain}%✅", ] ) else: - rows.append( + rows_existing.append( [ f"`{print_filename}::{qualified_name}`", f"{print_original_runtime}", f"{print_optimized_runtime}", - f"✅{perf_gain}%", + f"{perf_gain}%✅", ] ) - output += tabulate( # type: ignore[no-untyped-call] - headers=headers, tabular_data=rows, tablefmt="pipe", colglobalalign=None, preserve_whitespace=True + output_existing += tabulate( # type: ignore[no-untyped-call] + headers=headers, tabular_data=rows_existing, tablefmt="pipe", colglobalalign=None, preserve_whitespace=True + ) + output_existing += "\n" + if len(rows_existing) == 0: + output_existing = "" + output_concolic += tabulate( # type: ignore[no-untyped-call] + headers=headers, tabular_data=rows_concolic, tablefmt="pipe", colglobalalign=None, preserve_whitespace=True + ) + output_concolic += "\n" + if len(rows_concolic) == 0: + output_concolic = "" + output_replay += tabulate( # type: ignore[no-untyped-call] + headers=headers, tabular_data=rows_replay, tablefmt="pipe", colglobalalign=None, preserve_whitespace=True ) - output += "\n" - return output + output_replay += "\n" + if len(rows_replay) == 0: + output_replay = "" + return output_existing, output_replay, output_concolic def check_create_pr( @@ -131,6 +186,8 @@ def check_create_pr( generated_original_test_source: str, function_trace_id: str, coverage_message: str, + replay_tests: str, + concolic_tests: str, git_remote: Optional[str] = None, ) -> None: pr_number: Optional[int] = env_utils.get_pr_number() @@ -171,6 +228,8 @@ def check_create_pr( generated_tests=generated_original_test_source, trace_id=function_trace_id, coverage_message=coverage_message, + replay_tests=replay_tests, + concolic_tests=concolic_tests, ) if response.ok: logger.info(f"Suggestions were successfully made to PR #{pr_number}") @@ -218,6 +277,8 @@ def check_create_pr( generated_tests=generated_original_test_source, trace_id=function_trace_id, coverage_message=coverage_message, + replay_tests=replay_tests, + concolic_tests=concolic_tests, ) if response.ok: pr_id = response.text diff --git a/tests/test_existing_tests_source_for.py b/tests/test_existing_tests_source_for.py index ccc3f8ca2..1877284d5 100644 --- a/tests/test_existing_tests_source_for.py +++ b/tests/test_existing_tests_source_for.py @@ -1,8 +1,11 @@ +from unittest.mock import Mock +import contextlib import os +import shutil +import unittest +from dataclasses import dataclass from pathlib import Path -from unittest.mock import Mock - -import pytest +from typing import Optional from codeflash.result.create_pr import existing_tests_source_for @@ -38,7 +41,7 @@ def test_no_test_files_returns_empty_string(self): original_runtimes = {} optimized_runtimes = {} - result = existing_tests_source_for( + result, _, _ = existing_tests_source_for( "module.function", function_to_tests, self.test_cfg, @@ -61,7 +64,7 @@ def test_single_test_with_improvement(self): self.mock_invocation_id: [500000] # 0.5ms in nanoseconds } - result = existing_tests_source_for( + result, _, _ = existing_tests_source_for( "module.function", function_to_tests, self.test_cfg, @@ -71,7 +74,7 @@ def test_single_test_with_improvement(self): expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | |:------------------------------------------|:--------------|:---------------|:----------| -| `test_module.py::TestClass.test_function` | 1.00ms | 500μs | ✅100% | +| `test_module.py::TestClass.test_function` | 1.00ms | 500μs | 100%✅ | """ assert result == expected @@ -89,7 +92,7 @@ def test_single_test_with_regression(self): self.mock_invocation_id: [1000000] # 1ms in nanoseconds } - result = existing_tests_source_for( + result, _, _ = existing_tests_source_for( "module.function", function_to_tests, self.test_cfg, @@ -99,7 +102,7 @@ def test_single_test_with_regression(self): expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | |:------------------------------------------|:--------------|:---------------|:----------| -| `test_module.py::TestClass.test_function` | 500μs | 1.00ms | ⚠️-50.0% | +| `test_module.py::TestClass.test_function` | 500μs | 1.00ms | -50.0%⚠️ | """ assert result == expected @@ -122,7 +125,7 @@ def test_test_without_class_name(self): mock_invocation_no_class: [800000] } - result = existing_tests_source_for( + result, _, _ = existing_tests_source_for( "module.function", function_to_tests, self.test_cfg, @@ -132,7 +135,7 @@ def test_test_without_class_name(self): expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | |:----------------------------------|:--------------|:---------------|:----------| -| `test_module.py::test_standalone` | 1.00ms | 800μs | ✅25.0% | +| `test_module.py::test_standalone` | 1.00ms | 800μs | 25.0%✅ | """ assert result == expected @@ -148,7 +151,7 @@ def test_missing_original_runtime(self): self.mock_invocation_id: [500000] } - result = existing_tests_source_for( + result, _, _ = existing_tests_source_for( "module.function", function_to_tests, self.test_cfg, @@ -156,9 +159,7 @@ def test_missing_original_runtime(self): optimized_runtimes ) - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | -|----------------------------|---------------|----------------|-----------| -""" + expected = "" assert result == expected @@ -173,7 +174,7 @@ def test_missing_optimized_runtime(self): } optimized_runtimes = {} - result = existing_tests_source_for( + result, _, _ = existing_tests_source_for( "module.function", function_to_tests, self.test_cfg, @@ -181,9 +182,7 @@ def test_missing_optimized_runtime(self): optimized_runtimes ) - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | -|----------------------------|---------------|----------------|-----------| -""" + expected = "" assert result == expected @@ -212,7 +211,7 @@ def test_multiple_tests_sorted_output(self): mock_invocation_2: [1500000] } - result = existing_tests_source_for( + result, _, _ = existing_tests_source_for( "module.function", function_to_tests, self.test_cfg, @@ -222,8 +221,8 @@ def test_multiple_tests_sorted_output(self): expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | |:-----------------------------------------------------|:--------------|:---------------|:----------| -| `test_another.py::TestAnother.test_another_function` | 2.00ms | 1.50ms | ✅33.3% | -| `test_module.py::TestClass.test_function` | 1.00ms | 800μs | ✅25.0% | +| `test_another.py::TestAnother.test_another_function` | 2.00ms | 1.50ms | 33.3%✅ | +| `test_module.py::TestClass.test_function` | 1.00ms | 800μs | 25.0%✅ | """ assert result == expected @@ -241,7 +240,7 @@ def test_multiple_runtimes_uses_minimum(self): self.mock_invocation_id: [600000, 700000, 500000] # min: 500000 } - result = existing_tests_source_for( + result, _, _ = existing_tests_source_for( "module.function", function_to_tests, self.test_cfg, @@ -251,7 +250,7 @@ def test_multiple_runtimes_uses_minimum(self): expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | |:------------------------------------------|:--------------|:---------------|:----------| -| `test_module.py::TestClass.test_function` | 800μs | 500μs | ✅60.0% | +| `test_module.py::TestClass.test_function` | 800μs | 500μs | 60.0%✅ | """ assert result == expected @@ -278,7 +277,7 @@ def test_complex_module_path_conversion(self): mock_invocation_complex: [750000] } - result = existing_tests_source_for( + result, _, _ = existing_tests_source_for( "module.function", function_to_tests, self.test_cfg, @@ -288,7 +287,7 @@ def test_complex_module_path_conversion(self): expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | |:------------------------------------------------------------------------|:--------------|:---------------|:----------| -| `integration/test_complex_module.py::TestComplex.test_complex_function` | 1.00ms | 750μs | ✅33.3% | +| `integration/test_complex_module.py::TestComplex.test_complex_function` | 1.00ms | 750μs | 33.3%✅ | """ assert result == expected @@ -306,7 +305,7 @@ def test_zero_runtime_values(self): self.mock_invocation_id: [0] } - result = existing_tests_source_for( + result, _, _ = existing_tests_source_for( "module.function", function_to_tests, self.test_cfg, @@ -314,9 +313,7 @@ def test_zero_runtime_values(self): optimized_runtimes ) - expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | -|----------------------------|---------------|----------------|-----------| -""" + expected = "" assert result == expected @@ -345,7 +342,7 @@ def test_filters_out_generated_tests(self): mock_generated_invocation: [400000] # This should be filtered out } - result = existing_tests_source_for( + result, _, _ = existing_tests_source_for( "module.function", function_to_tests, self.test_cfg, @@ -356,9 +353,246 @@ def test_filters_out_generated_tests(self): # Should only include the non-generated test expected = """| Test File::Test Function | Original ⏱️ | Optimized ⏱️ | Speedup | |:------------------------------------------|:--------------|:---------------|:----------| -| `test_module.py::TestClass.test_function` | 1.00ms | 800μs | ✅25.0% | +| `test_module.py::TestClass.test_function` | 1.00ms | 800μs | 25.0%✅ | """ assert result == expected - +@dataclass(frozen=True) +class MockInvocationId: + """Mocks codeflash.models.models.InvocationId""" + test_module_path: str + test_function_name: str + test_class_name: Optional[str] = None + + +@dataclass(frozen=True) +class MockTestsInFile: + """Mocks a part of codeflash.models.models.FunctionCalledInTest""" + test_file: Path + + +@dataclass(frozen=True) +class MockFunctionCalledInTest: + """Mocks codeflash.models.models.FunctionCalledInTest""" + tests_in_file: MockTestsInFile + + +@dataclass(frozen=True) +class MockTestConfig: + """Mocks codeflash.verification.verification_utils.TestConfig""" + tests_root: Path + + +@contextlib.contextmanager +def temp_project_dir(): + """A context manager to create and chdir into a temporary project directory.""" + original_cwd = os.getcwd() + # Use a unique name to avoid conflicts in /tmp + project_root = Path(f"/tmp/test_project_{os.getpid()}").resolve() + try: + project_root.mkdir(exist_ok=True, parents=True) + os.chdir(project_root) + yield project_root + finally: + os.chdir(original_cwd) + shutil.rmtree(project_root, ignore_errors=True) + + +class ExistingTestsSourceForTests(unittest.TestCase): + def setUp(self): + self.func_qual_name = "my_module.my_function" + # A default test_cfg for tests that don't rely on file system. + self.test_cfg = MockTestConfig(tests_root=Path("/tmp/tests")) + + def test_no_tests_for_function(self): + """Test case where no tests are found for the given function.""" + existing, replay, concolic = existing_tests_source_for( + function_qualified_name_with_modules_from_root=self.func_qual_name, + function_to_tests={}, + test_cfg=self.test_cfg, + original_runtimes_all={}, + optimized_runtimes_all={}, + ) + self.assertEqual(existing, "") + self.assertEqual(replay, "") + self.assertEqual(concolic, "") + + def test_no_runtime_data(self): + """Test case where tests exist but there is no runtime data.""" + with temp_project_dir() as project_root: + tests_dir = project_root / "tests" + tests_dir.mkdir(exist_ok=True) + test_file_path = (tests_dir / "test_stuff.py").resolve() + test_file_path.touch() + + test_cfg = MockTestConfig(tests_root=tests_dir.resolve()) + function_to_tests = { + self.func_qual_name: { + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=test_file_path) + ) + } + } + existing, replay, concolic = existing_tests_source_for( + function_qualified_name_with_modules_from_root=self.func_qual_name, + function_to_tests=function_to_tests, + test_cfg=test_cfg, + original_runtimes_all={}, + optimized_runtimes_all={}, + ) + self.assertEqual(existing, "") + self.assertEqual(replay, "") + self.assertEqual(concolic, "") + + def test_with_existing_test_speedup(self): + """Test with a single existing test that shows a speedup.""" + with temp_project_dir() as project_root: + tests_dir = project_root / "tests" + tests_dir.mkdir(exist_ok=True) + test_file_path = (tests_dir / "test_existing.py").resolve() + test_file_path.touch() + + test_cfg = MockTestConfig(tests_root=tests_dir.resolve()) + function_to_tests = { + self.func_qual_name: { + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=test_file_path) + ) + } + } + + invocation_id = MockInvocationId( + test_module_path="tests.test_existing", + test_class_name="TestMyStuff", + test_function_name="test_one", + ) + + original_runtimes = {invocation_id: [200_000_000]} + optimized_runtimes = {invocation_id: [100_000_000]} + + existing, replay, concolic = existing_tests_source_for( + function_qualified_name_with_modules_from_root=self.func_qual_name, + function_to_tests=function_to_tests, + test_cfg=test_cfg, + original_runtimes_all=original_runtimes, + optimized_runtimes_all=optimized_runtimes, + ) + + self.assertIn("| Test File::Test Function", existing) + self.assertIn("`test_existing.py::TestMyStuff.test_one`", existing) + self.assertIn("200ms", existing) + self.assertIn("100ms", existing) + self.assertIn("100%✅", existing) + self.assertEqual(replay, "") + self.assertEqual(concolic, "") + + def test_with_replay_and_concolic_tests_slowdown(self): + """Test with replay and concolic tests showing a slowdown.""" + with temp_project_dir() as project_root: + tests_dir = project_root / "tests" + tests_dir.mkdir(exist_ok=True) + replay_test_path = (tests_dir / "__replay_test_abc.py").resolve() + replay_test_path.touch() + concolic_test_path = (tests_dir / "codeflash_concolic_xyz.py").resolve() + concolic_test_path.touch() + + test_cfg = MockTestConfig(tests_root=tests_dir.resolve()) + function_to_tests = { + self.func_qual_name: { + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=replay_test_path) + ), + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=concolic_test_path) + ), + } + } + + replay_inv_id = MockInvocationId( + test_module_path="tests.__replay_test_abc", + test_function_name="test_replay_one", + ) + concolic_inv_id = MockInvocationId( + test_module_path="tests.codeflash_concolic_xyz", + test_function_name="test_concolic_one", + ) + + original_runtimes = { + replay_inv_id: [100_000_000], + concolic_inv_id: [150_000_000], + } + optimized_runtimes = { + replay_inv_id: [200_000_000], + concolic_inv_id: [300_000_000], + } + + existing, replay, concolic = existing_tests_source_for( + function_qualified_name_with_modules_from_root=self.func_qual_name, + function_to_tests=function_to_tests, + test_cfg=test_cfg, + original_runtimes_all=original_runtimes, + optimized_runtimes_all=optimized_runtimes, + ) + + self.assertEqual(existing, "") + self.assertIn("`__replay_test_abc.py::test_replay_one`", replay) + self.assertIn("-50.0%⚠️", replay) + self.assertIn("`codeflash_concolic_xyz.py::test_concolic_one`", concolic) + self.assertIn("-50.0%⚠️", concolic) + + def test_mixed_results_and_min_runtime(self): + """Test with mixed results and that min() of runtimes is used.""" + with temp_project_dir() as project_root: + tests_dir = project_root / "tests" + tests_dir.mkdir(exist_ok=True) + existing_test_path = (tests_dir / "test_existing.py").resolve() + existing_test_path.touch() + replay_test_path = (tests_dir / "__replay_test_mixed.py").resolve() + replay_test_path.touch() + + test_cfg = MockTestConfig(tests_root=tests_dir.resolve()) + function_to_tests = { + self.func_qual_name: { + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=existing_test_path) + ), + MockFunctionCalledInTest( + tests_in_file=MockTestsInFile(test_file=replay_test_path) + ), + } + } + + existing_inv_id = MockInvocationId( + "tests.test_existing", "test_speedup", "TestExisting" + ) + replay_inv_id = MockInvocationId( + "tests.__replay_test_mixed", "test_slowdown" + ) + + original_runtimes = { + existing_inv_id: [400_000_000, 500_000_000], # min is 400ms + replay_inv_id: [100_000_000, 110_000_000], # min is 100ms + } + optimized_runtimes = { + existing_inv_id: [210_000_000, 200_000_000], # min is 200ms + replay_inv_id: [300_000_000, 290_000_000], # min is 290ms + } + + existing, replay, concolic = existing_tests_source_for( + self.func_qual_name, + function_to_tests, + test_cfg, + original_runtimes, + optimized_runtimes, + ) + + self.assertIn("`test_existing.py::TestExisting.test_speedup`", existing) + self.assertIn("400ms", existing) + self.assertIn("200ms", existing) + self.assertIn("100%✅", existing) + self.assertIn("`__replay_test_mixed.py::test_slowdown`", replay) + self.assertIn("100ms", replay) + self.assertIn("290ms", replay) + self.assertIn("-65.5%⚠️", replay) + self.assertEqual(concolic, "")