From f6fd4503dfb336e0df9fd5d9c139de1d0f60c42f Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Fri, 30 May 2025 20:03:53 -0700 Subject: [PATCH 01/14] first version of stdout capture improvement --- codeflash/verification/parse_test_output.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py index b9060f4bc..9dcacc1d2 100644 --- a/codeflash/verification/parse_test_output.py +++ b/codeflash/verification/parse_test_output.py @@ -37,7 +37,7 @@ def parse_func(file_path: Path) -> XMLParser: matches_re = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!") -cleaner_re = re.compile(r"!######.*?######!|-+\s*Captured\s+(Log|Out)\s*-+\n?") +stdout_re = re.compile(r"!######.*?######!\n(.*)!\$######.*?######\$!", re.DOTALL) def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults: @@ -268,7 +268,7 @@ def parse_test_xml( matches = matches_re.findall(sys_stdout) if sys_stdout: - sys_stdout = cleaner_re.sub("", sys_stdout).strip() + sys_stdout = stdout_re.search(sys_stdout).group(1) if not matches or not len(matches): test_results.add( From 6dd2feef6ae591b8de78c89274f8b2a606c8e1b9 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Mon, 2 Jun 2025 17:59:29 -0700 Subject: [PATCH 02/14] progress --- .../code_utils/instrument_existing_tests.py | 132 +++++++++++++----- codeflash/verification/parse_test_output.py | 6 +- tests/test_instrument_tests.py | 16 ++- 3 files changed, 111 insertions(+), 43 deletions(-) diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py index e691c2ed2..50c3610bd 100644 --- a/codeflash/code_utils/instrument_existing_tests.py +++ b/codeflash/code_utils/instrument_existing_tests.py @@ -462,50 +462,96 @@ def create_wrapper_function(mode: TestingMode = TestingMode.BEHAVIOR) -> ast.Fun ), *( [ + ast.Assign( + targets=[ + ast.Name(id='test_stdout_tag', ctx=ast.Store())], + value=ast.JoinedStr( + values=[ + ast.FormattedValue( + value=ast.Name(id='test_module_name', ctx=ast.Load()), + conversion=-1), + ast.Constant(value=':'), + ast.FormattedValue( + value=ast.IfExp( + test=ast.Name(id='test_class_name', ctx=ast.Load()), + body=ast.BinOp( + left=ast.Name(id='test_class_name', ctx=ast.Load()), + op=ast.Add(), + right=ast.Constant(value='.')), + orelse=ast.Constant(value='')), + conversion=-1), + ast.FormattedValue( + value=ast.Name(id='test_name', ctx=ast.Load()), + conversion=-1), + ast.Constant(value=':'), + ast.FormattedValue( + value=ast.Name(id='function_name', ctx=ast.Load()), + conversion=-1), + ast.Constant(value=':'), + ast.FormattedValue( + value=ast.Name(id='loop_index', ctx=ast.Load()), + conversion=-1), + ast.Constant(value=':'), + ast.FormattedValue( + value=ast.Name(id='invocation_id', ctx=ast.Load()), + conversion=-1)]), + lineno=lineno + 9,), ast.Expr( value=ast.Call( - func=ast.Name(id="print", ctx=ast.Load()), + func=ast.Name(id='print', ctx=ast.Load()), args=[ ast.JoinedStr( values=[ - ast.Constant(value="!######"), + ast.Constant(value='!$######'), ast.FormattedValue( - value=ast.Name(id="test_module_name", ctx=ast.Load()), conversion=-1 - ), - ast.Constant(value=":"), - ast.FormattedValue( - value=ast.IfExp( - test=ast.Name(id="test_class_name", ctx=ast.Load()), - body=ast.BinOp( - left=ast.Name(id="test_class_name", ctx=ast.Load()), - op=ast.Add(), - right=ast.Constant(value="."), - ), - orelse=ast.Constant(value=""), - ), - conversion=-1, - ), - ast.FormattedValue(value=ast.Name(id="test_name", ctx=ast.Load()), conversion=-1), - ast.Constant(value=":"), - ast.FormattedValue( - value=ast.Name(id="function_name", ctx=ast.Load()), conversion=-1 - ), - ast.Constant(value=":"), - ast.FormattedValue(value=ast.Name(id="loop_index", ctx=ast.Load()), conversion=-1), - ast.Constant(value=":"), - ast.FormattedValue( - value=ast.Name(id="invocation_id", ctx=ast.Load()), conversion=-1 - ), - ast.Constant(value="######!"), - ] - ) - ], - keywords=[], - ) - ) + value=ast.Name(id='test_stdout_tag', ctx=ast.Load()), + conversion=-1), + ast.Constant(value='######$!')])], + keywords=[])), + # ast.Expr( + # value=ast.Call( + # func=ast.Name(id="print", ctx=ast.Load()), + # args=[ + # ast.JoinedStr( + # values=[ + # ast.Constant(value="!######"), + # ast.FormattedValue( + # value=ast.Name(id="test_module_name", ctx=ast.Load()), conversion=-1 + # ), + # ast.Constant(value=":"), + # ast.FormattedValue( + # value=ast.IfExp( + # test=ast.Name(id="test_class_name", ctx=ast.Load()), + # body=ast.BinOp( + # left=ast.Name(id="test_class_name", ctx=ast.Load()), + # op=ast.Add(), + # right=ast.Constant(value="."), + # ), + # orelse=ast.Constant(value=""), + # ), + # conversion=-1, + # ), + # ast.FormattedValue(value=ast.Name(id="test_name", ctx=ast.Load()), conversion=-1), + # ast.Constant(value=":"), + # ast.FormattedValue( + # value=ast.Name(id="function_name", ctx=ast.Load()), conversion=-1 + # ), + # ast.Constant(value=":"), + # ast.FormattedValue(value=ast.Name(id="loop_index", ctx=ast.Load()), conversion=-1), + # ast.Constant(value=":"), + # ast.FormattedValue( + # value=ast.Name(id="invocation_id", ctx=ast.Load()), conversion=-1 + # ), + # ast.Constant(value="######!"), + # ] + # ) + # ], + # keywords=[], + # ) + # ) ] - if mode == TestingMode.BEHAVIOR - else [] + # if mode == TestingMode.BEHAVIOR + # else [] ), ast.Assign( targets=[ast.Name(id="exception", ctx=ast.Store())], value=ast.Constant(value=None), lineno=lineno + 10 @@ -598,6 +644,18 @@ def create_wrapper_function(mode: TestingMode = TestingMode.BEHAVIOR) -> ast.Fun keywords=[], ) ), + ast.Expr( + value=ast.Call( + func=ast.Name(id='print', ctx=ast.Load()), + args=[ + ast.JoinedStr( + values=[ + ast.Constant(value='!######'), + ast.FormattedValue( + value=ast.Name(id='test_stdout_tag', ctx=ast.Load()), + conversion=-1), + ast.Constant(value='######!')])], + keywords=[])), *( [ ast.Expr( diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py index 9dcacc1d2..7e4ae7213 100644 --- a/codeflash/verification/parse_test_output.py +++ b/codeflash/verification/parse_test_output.py @@ -37,7 +37,7 @@ def parse_func(file_path: Path) -> XMLParser: matches_re = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!") -stdout_re = re.compile(r"!######.*?######!\n(.*)!\$######.*?######\$!", re.DOTALL) +stdout_re = re.compile(r"!\$######.*?######\$!\n(.*)!######.*?######!", re.DOTALL) def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults: @@ -268,7 +268,9 @@ def parse_test_xml( matches = matches_re.findall(sys_stdout) if sys_stdout: - sys_stdout = stdout_re.search(sys_stdout).group(1) + print("sys_stdout: ", sys_stdout) + stdout_match = stdout_re.search(sys_stdout) + sys_stdout = stdout_match.group(1) if stdout_match else "" if not matches or not len(matches): test_results.add( diff --git a/tests/test_instrument_tests.py b/tests/test_instrument_tests.py index 44661912a..c0f96c022 100644 --- a/tests/test_instrument_tests.py +++ b/tests/test_instrument_tests.py @@ -37,7 +37,8 @@ codeflash_wrap.index[test_id] = 0 codeflash_test_index = codeflash_wrap.index[test_id] invocation_id = f'{{line_id}}_{{codeflash_test_index}}' - print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!") + test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}" + print(f"!$######{test_stdout_tag}######$!") exception = None gc.disable() try: @@ -48,6 +49,7 @@ codeflash_duration = time.perf_counter_ns() - counter exception = e gc.enable() + print(f"!######{test_stdout_tag}######!") pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value) codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call')) codeflash_con.commit() @@ -67,6 +69,8 @@ codeflash_test_index = codeflash_wrap.index[test_id] invocation_id = f'{{line_id}}_{{codeflash_test_index}}' exception = None + test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}" + print(f"!$######{{test_stdout_tag}}######$!") gc.disable() try: counter = time.perf_counter_ns() @@ -76,7 +80,7 @@ codeflash_duration = time.perf_counter_ns() - counter exception = e gc.enable() - print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}:{{codeflash_duration}}######!") + print(f"!######{{test_stdout_tag}}:{{codeflash_duration}}######!") if exception: raise exception return return_value @@ -124,11 +128,14 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi codeflash_wrap.index[test_id] = 0 codeflash_test_index = codeflash_wrap.index[test_id] invocation_id = f'{{line_id}}_{{codeflash_test_index}}' + test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}" """ if sys.version_info < (3, 12): - expected += """print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!")""" + expected += """test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}" + print(f"!$######{{test_stdout_tag}}######$!")""" else: - expected += """print(f'!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!')""" + expected += """print(f'!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!') + print(f'!$######{{test_stdout_tag}}######$!')""" expected += """ exception = None gc.disable() @@ -140,6 +147,7 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi codeflash_duration = time.perf_counter_ns() - counter exception = e gc.enable() + print(f"!######{{test_stdout_tag}}######!") pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value) codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call')) codeflash_con.commit() From 8b2f9486c1263c79b75058f98368881eabdef6dc Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Mon, 2 Jun 2025 22:27:02 -0700 Subject: [PATCH 03/14] seems to be working --- codeflash/verification/parse_test_output.py | 58 +++++++++++++-------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py index 7e4ae7213..480b40630 100644 --- a/codeflash/verification/parse_test_output.py +++ b/codeflash/verification/parse_test_output.py @@ -36,8 +36,8 @@ def parse_func(file_path: Path) -> XMLParser: return parse(file_path, xml_parser) -matches_re = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!") -stdout_re = re.compile(r"!\$######.*?######\$!\n(.*)!######.*?######!", re.DOTALL) +matches_re_start = re.compile(r"!\$######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######\$!\n") +matches_re_end = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!") def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults: @@ -265,14 +265,16 @@ def parse_test_xml( timed_out = True sys_stdout = testcase.system_out or "" - matches = matches_re.findall(sys_stdout) - - if sys_stdout: - print("sys_stdout: ", sys_stdout) - stdout_match = stdout_re.search(sys_stdout) - sys_stdout = stdout_match.group(1) if stdout_match else "" - - if not matches or not len(matches): + begin_matches = [match for match in matches_re_start.finditer(sys_stdout)] + end_matches = {} + for match in matches_re_end.finditer(sys_stdout): + groups = match.groups() + if len(groups[5].split(":")) > 1: + iteration_id = groups[5].split(":")[0] + groups = groups[:5] + (iteration_id,) + end_matches[groups] = match + + if not begin_matches or not begin_matches: test_results.add( FunctionTestInvocation( loop_index=loop_index, @@ -290,26 +292,36 @@ def parse_test_xml( test_type=test_type, return_value=None, timed_out=timed_out, - stdout=sys_stdout, + stdout="", ) ) else: - for match in matches: - split_val = match[5].split(":") - if len(split_val) > 1: - iteration_id = split_val[0] - runtime = int(split_val[1]) + for match_index, match in enumerate(begin_matches): + groups = match.groups() + end_match = end_matches.get(groups) + iteration_id, runtime = groups[5], None + if end_match: + stdout = sys_stdout[match.end() : end_match.start()] + split_val = end_match.groups()[5].split(":") + if len(split_val) > 1: + iteration_id = split_val[0] + runtime = int(split_val[1]) + else: + iteration_id, runtime = split_val[0], None + elif match_index == len(begin_matches) - 1: + stdout = sys_stdout[match.end() :] else: - iteration_id, runtime = split_val[0], None + stdout = sys_stdout[match.end() : begin_matches[match_index + 1].start()] + test_results.add( FunctionTestInvocation( - loop_index=int(match[4]), + loop_index=int(groups[4]), id=InvocationId( - test_module_path=match[0], - test_class_name=None if match[1] == "" else match[1][:-1], - test_function_name=match[2], - function_getting_tested=match[3], + test_module_path=groups[0], + test_class_name=None if groups[1] == "" else groups[1][:-1], + test_function_name=groups[2], + function_getting_tested=groups[3], iteration_id=iteration_id, ), file_name=test_file_path, @@ -319,7 +331,7 @@ def parse_test_xml( test_type=test_type, return_value=None, timed_out=timed_out, - stdout=sys_stdout, + stdout=stdout, ) ) From 22c62cac4006d12baae139757c8fa4694320e435 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 3 Jun 2025 01:13:38 -0700 Subject: [PATCH 04/14] Fix some bugs and tests --- .../code_utils/instrument_existing_tests.py | 130 ++++------ tests/test_instrument_tests.py | 223 ++++++++++++------ ...t_instrumentation_run_results_aiservice.py | 59 +++-- tests/test_test_runner.py | 4 +- 4 files changed, 230 insertions(+), 186 deletions(-) diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py index 50c3610bd..9c071996a 100644 --- a/codeflash/code_utils/instrument_existing_tests.py +++ b/codeflash/code_utils/instrument_existing_tests.py @@ -463,51 +463,51 @@ def create_wrapper_function(mode: TestingMode = TestingMode.BEHAVIOR) -> ast.Fun *( [ ast.Assign( - targets=[ - ast.Name(id='test_stdout_tag', ctx=ast.Store())], + targets=[ast.Name(id="test_stdout_tag", ctx=ast.Store())], value=ast.JoinedStr( values=[ - ast.FormattedValue( - value=ast.Name(id='test_module_name', ctx=ast.Load()), - conversion=-1), - ast.Constant(value=':'), + ast.FormattedValue(value=ast.Name(id="test_module_name", ctx=ast.Load()), conversion=-1), + ast.Constant(value=":"), ast.FormattedValue( value=ast.IfExp( - test=ast.Name(id='test_class_name', ctx=ast.Load()), + test=ast.Name(id="test_class_name", ctx=ast.Load()), body=ast.BinOp( - left=ast.Name(id='test_class_name', ctx=ast.Load()), + left=ast.Name(id="test_class_name", ctx=ast.Load()), op=ast.Add(), - right=ast.Constant(value='.')), - orelse=ast.Constant(value='')), - conversion=-1), - ast.FormattedValue( - value=ast.Name(id='test_name', ctx=ast.Load()), - conversion=-1), - ast.Constant(value=':'), - ast.FormattedValue( - value=ast.Name(id='function_name', ctx=ast.Load()), - conversion=-1), - ast.Constant(value=':'), - ast.FormattedValue( - value=ast.Name(id='loop_index', ctx=ast.Load()), - conversion=-1), - ast.Constant(value=':'), - ast.FormattedValue( - value=ast.Name(id='invocation_id', ctx=ast.Load()), - conversion=-1)]), - lineno=lineno + 9,), + right=ast.Constant(value="."), + ), + orelse=ast.Constant(value=""), + ), + conversion=-1, + ), + ast.FormattedValue(value=ast.Name(id="test_name", ctx=ast.Load()), conversion=-1), + ast.Constant(value=":"), + ast.FormattedValue(value=ast.Name(id="function_name", ctx=ast.Load()), conversion=-1), + ast.Constant(value=":"), + ast.FormattedValue(value=ast.Name(id="loop_index", ctx=ast.Load()), conversion=-1), + ast.Constant(value=":"), + ast.FormattedValue(value=ast.Name(id="invocation_id", ctx=ast.Load()), conversion=-1), + ] + ), + lineno=lineno + 9, + ), ast.Expr( value=ast.Call( - func=ast.Name(id='print', ctx=ast.Load()), + func=ast.Name(id="print", ctx=ast.Load()), args=[ ast.JoinedStr( values=[ - ast.Constant(value='!$######'), + ast.Constant(value="!$######"), ast.FormattedValue( - value=ast.Name(id='test_stdout_tag', ctx=ast.Load()), - conversion=-1), - ast.Constant(value='######$!')])], - keywords=[])), + value=ast.Name(id="test_stdout_tag", ctx=ast.Load()), conversion=-1 + ), + ast.Constant(value="######$!"), + ] + ) + ], + keywords=[], + ) + ), # ast.Expr( # value=ast.Call( # func=ast.Name(id="print", ctx=ast.Load()), @@ -646,66 +646,28 @@ def create_wrapper_function(mode: TestingMode = TestingMode.BEHAVIOR) -> ast.Fun ), ast.Expr( value=ast.Call( - func=ast.Name(id='print', ctx=ast.Load()), + func=ast.Name(id="print", ctx=ast.Load()), args=[ ast.JoinedStr( values=[ - ast.Constant(value='!######'), - ast.FormattedValue( - value=ast.Name(id='test_stdout_tag', ctx=ast.Load()), - conversion=-1), - ast.Constant(value='######!')])], - keywords=[])), - *( - [ - ast.Expr( - value=ast.Call( - func=ast.Name(id="print", ctx=ast.Load()), - args=[ - ast.JoinedStr( - values=[ - ast.Constant(value="!######"), - ast.FormattedValue( - value=ast.Name(id="test_module_name", ctx=ast.Load()), conversion=-1 - ), - ast.Constant(value=":"), - ast.FormattedValue( - value=ast.IfExp( - test=ast.Name(id="test_class_name", ctx=ast.Load()), - body=ast.BinOp( - left=ast.Name(id="test_class_name", ctx=ast.Load()), - op=ast.Add(), - right=ast.Constant(value="."), - ), - orelse=ast.Constant(value=""), - ), - conversion=-1, - ), - ast.FormattedValue(value=ast.Name(id="test_name", ctx=ast.Load()), conversion=-1), - ast.Constant(value=":"), - ast.FormattedValue( - value=ast.Name(id="function_name", ctx=ast.Load()), conversion=-1 - ), - ast.Constant(value=":"), - ast.FormattedValue(value=ast.Name(id="loop_index", ctx=ast.Load()), conversion=-1), - ast.Constant(value=":"), - ast.FormattedValue( - value=ast.Name(id="invocation_id", ctx=ast.Load()), conversion=-1 - ), + ast.Constant(value="!######"), + ast.FormattedValue(value=ast.Name(id="test_stdout_tag", ctx=ast.Load()), conversion=-1), + *( + [ ast.Constant(value=":"), ast.FormattedValue( value=ast.Name(id="codeflash_duration", ctx=ast.Load()), conversion=-1 ), - ast.Constant(value="######!"), ] - ) - ], - keywords=[], + if mode == TestingMode.PERFORMANCE + else [] + ), + ast.Constant(value="######!"), + ] ) - ) - ] - if mode == TestingMode.PERFORMANCE - else [] + ], + keywords=[], + ) ), *( [ diff --git a/tests/test_instrument_tests.py b/tests/test_instrument_tests.py index c0f96c022..80c69bec9 100644 --- a/tests/test_instrument_tests.py +++ b/tests/test_instrument_tests.py @@ -38,7 +38,7 @@ codeflash_test_index = codeflash_wrap.index[test_id] invocation_id = f'{{line_id}}_{{codeflash_test_index}}' test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}" - print(f"!$######{test_stdout_tag}######$!") + print(f"!$######{{test_stdout_tag}}######$!") exception = None gc.disable() try: @@ -49,7 +49,7 @@ codeflash_duration = time.perf_counter_ns() - counter exception = e gc.enable() - print(f"!######{test_stdout_tag}######!") + print(f"!######{{test_stdout_tag}}######!") pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value) codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call')) codeflash_con.commit() @@ -68,9 +68,9 @@ codeflash_wrap.index[test_id] = 0 codeflash_test_index = codeflash_wrap.index[test_id] invocation_id = f'{{line_id}}_{{codeflash_test_index}}' - exception = None test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}" print(f"!$######{{test_stdout_tag}}######$!") + exception = None gc.disable() try: counter = time.perf_counter_ns() @@ -128,13 +128,12 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi codeflash_wrap.index[test_id] = 0 codeflash_test_index = codeflash_wrap.index[test_id] invocation_id = f'{{line_id}}_{{codeflash_test_index}}' - test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}" """ if sys.version_info < (3, 12): expected += """test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}" print(f"!$######{{test_stdout_tag}}######$!")""" else: - expected += """print(f'!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!') + expected += """test_stdout_tag = f'{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}' print(f'!$######{{test_stdout_tag}}######$!')""" expected += """ exception = None @@ -147,7 +146,12 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi codeflash_duration = time.perf_counter_ns() - counter exception = e gc.enable() - print(f"!######{{test_stdout_tag}}######!") + """ + if sys.version_info < (3, 12): + expected += """print(f"!######{{test_stdout_tag}}######!")""" + else: + expected += """print(f'!######{{test_stdout_tag}}######!')""" + expected += """ pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value) codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call')) codeflash_con.commit() @@ -235,9 +239,11 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi invocation_id = f'{{line_id}}_{{codeflash_test_index}}' """ if sys.version_info < (3, 12): - expected += """print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!")""" + expected += """test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}" + print(f"!$######{{test_stdout_tag}}######$!")""" else: - expected += """print(f'!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!')""" + expected += """test_stdout_tag = f'{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}' + print(f'!$######{{test_stdout_tag}}######$!')""" expected += """ exception = None gc.disable() @@ -249,6 +255,12 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi codeflash_duration = time.perf_counter_ns() - counter exception = e gc.enable() + """ + if sys.version_info < (3, 12): + expected += """print(f"!######{{test_stdout_tag}}######!")""" + else: + expected += """print(f'!######{{test_stdout_tag}}######!')""" + expected += """ pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value) codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call')) codeflash_con.commit() @@ -489,6 +501,12 @@ def test_sort(): assert test_results_perf[0].runtime > 0 assert test_results_perf[0].did_pass assert test_results_perf[0].return_value is None + assert ( + test_results_perf[0].stdout + == """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5] +""" + ) assert test_results_perf[1].id.function_getting_tested == "sorter" assert test_results_perf[1].id.iteration_id == "4_0" @@ -500,12 +518,11 @@ def test_sort(): ) assert test_results_perf[1].runtime > 0 assert test_results_perf[1].did_pass - out_str = """codeflash stdout: Sorting list -result: [0, 1, 2, 3, 4, 5] -codeflash stdout: Sorting list -result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]""" - assert out_str == test_results_perf[1].stdout + out_str = """codeflash stdout: Sorting list +result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] +""" + assert test_results_perf[1].stdout == out_str ctx_result = func_optimizer.get_code_optimization_context() code_context: CodeOptimizationContext = ctx_result.unwrap() original_helper_code: dict[Path, str] = {} @@ -515,8 +532,7 @@ def test_sort(): helper_code = f.read() original_helper_code[helper_function_path] = helper_code computed_fn_opt = True - line_profiler_output_file = add_decorator_imports( - func_optimizer.function_to_optimize, code_context) + line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context) line_profile_results, _ = func_optimizer.run_and_parse_tests( testing_type=TestingMode.LINE_PROFILE, test_env=test_env, @@ -525,14 +541,16 @@ def test_sort(): pytest_min_loops=1, pytest_max_loops=1, testing_time=0.1, - line_profiler_output_file = line_profiler_output_file + line_profiler_output_file=line_profiler_output_file, ) tmp_lpr = list(line_profile_results["timings"].keys()) - assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1]==2 + assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1] == 2 finally: if computed_fn_opt: func_optimizer.write_code_and_helpers( - func_optimizer.function_to_optimize_source_code, original_helper_code, func_optimizer.function_to_optimize.file_path + func_optimizer.function_to_optimize_source_code, + original_helper_code, + func_optimizer.function_to_optimize.file_path, ) test_path.unlink(missing_ok=True) test_path_perf.unlink(missing_ok=True) @@ -690,6 +708,12 @@ def test_sort_parametrized(input, expected_output): ) assert test_results[0].runtime > 0 assert test_results[0].did_pass + assert ( + test_results[0].stdout + == """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5] +""" + ) assert test_results[1].id.function_getting_tested == "sorter" assert test_results[1].id.iteration_id == "0_1" @@ -701,6 +725,12 @@ def test_sort_parametrized(input, expected_output): ) assert test_results[1].runtime > 0 assert test_results[1].did_pass + assert ( + test_results[1].stdout + == """codeflash stdout: Sorting list +result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] +""" + ) assert test_results[2].id.function_getting_tested == "sorter" assert test_results[2].id.iteration_id == "0_2" @@ -746,7 +776,8 @@ def test_sort_parametrized(input, expected_output): assert test_results_perf[1].did_pass out_str = """codeflash stdout: Sorting list -result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]""" +result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] +""" assert out_str == test_results_perf[1].stdout assert test_results_perf[2].id.function_getting_tested == "sorter" @@ -768,8 +799,7 @@ def test_sort_parametrized(input, expected_output): helper_code = f.read() original_helper_code[helper_function_path] = helper_code computed_fn_opt = True - line_profiler_output_file = add_decorator_imports( - func_optimizer.function_to_optimize, code_context) + line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context) line_profile_results, _ = func_optimizer.run_and_parse_tests( testing_type=TestingMode.LINE_PROFILE, test_env=test_env, @@ -778,14 +808,16 @@ def test_sort_parametrized(input, expected_output): pytest_min_loops=1, pytest_max_loops=1, testing_time=0.1, - line_profiler_output_file = line_profiler_output_file + line_profiler_output_file=line_profiler_output_file, ) tmp_lpr = list(line_profile_results["timings"].keys()) - assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1]==3 + assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1] == 3 finally: if computed_fn_opt: func_optimizer.write_code_and_helpers( - func_optimizer.function_to_optimize_source_code, original_helper_code, func_optimizer.function_to_optimize.file_path + func_optimizer.function_to_optimize_source_code, + original_helper_code, + func_optimizer.function_to_optimize.file_path, ) test_path.unlink(missing_ok=True) test_path_perf.unlink(missing_ok=True) @@ -962,7 +994,10 @@ def test_sort_parametrized_loop(input, expected_output): assert test_results[0].runtime > 0 assert test_results[0].did_pass assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],) - + out_str = """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5] +""" + assert test_results[0].stdout == out_str assert test_results[1].id.function_getting_tested == "sorter" assert test_results[1].id.iteration_id == "0_0_1" assert test_results[1].id.test_class_name is None @@ -973,6 +1008,7 @@ def test_sort_parametrized_loop(input, expected_output): ) assert test_results[1].runtime > 0 assert test_results[1].did_pass + assert test_results[1].stdout == out_str assert test_results[2].id.function_getting_tested == "sorter" assert test_results[2].id.iteration_id == "0_0_2" @@ -984,6 +1020,10 @@ def test_sort_parametrized_loop(input, expected_output): ) assert test_results[2].runtime > 0 assert test_results[2].did_pass + out_str = """codeflash stdout: Sorting list +result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] +""" + assert test_results[2].stdout == out_str assert test_results[3].id.function_getting_tested == "sorter" assert test_results[3].id.iteration_id == "0_0_3" @@ -996,6 +1036,8 @@ def test_sort_parametrized_loop(input, expected_output): assert test_results[3].runtime > 0 assert test_results[3].did_pass + assert test_results[3].stdout == out_str + assert test_results[4].id.function_getting_tested == "sorter" assert test_results[4].id.iteration_id == "0_0_4" assert test_results[4].id.test_class_name is None @@ -1006,6 +1048,10 @@ def test_sort_parametrized_loop(input, expected_output): ) assert test_results[4].runtime > 0 assert test_results[4].did_pass + out_str = """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +""" + assert test_results[4].stdout == out_str assert test_results[5].id.function_getting_tested == "sorter" assert test_results[5].id.iteration_id == "0_0_5" @@ -1017,6 +1063,7 @@ def test_sort_parametrized_loop(input, expected_output): ) assert test_results[5].runtime > 0 assert test_results[5].did_pass + assert test_results[5].stdout == out_str test_results, _ = func_optimizer.run_and_parse_tests( testing_type=TestingMode.PERFORMANCE, @@ -1107,8 +1154,7 @@ def test_sort_parametrized_loop(input, expected_output): helper_code = f.read() original_helper_code[helper_function_path] = helper_code computed_fn_opt = True - line_profiler_output_file = add_decorator_imports( - func_optimizer.function_to_optimize, code_context) + line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context) line_profile_results, _ = func_optimizer.run_and_parse_tests( testing_type=TestingMode.LINE_PROFILE, test_env=test_env, @@ -1117,14 +1163,16 @@ def test_sort_parametrized_loop(input, expected_output): pytest_min_loops=1, pytest_max_loops=1, testing_time=0.1, - line_profiler_output_file = line_profiler_output_file + line_profiler_output_file=line_profiler_output_file, ) tmp_lpr = list(line_profile_results["timings"].keys()) - assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1]==6 + assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1] == 6 finally: if computed_fn_opt: func_optimizer.write_code_and_helpers( - func_optimizer.function_to_optimize_source_code, original_helper_code, func_optimizer.function_to_optimize.file_path + func_optimizer.function_to_optimize_source_code, + original_helper_code, + func_optimizer.function_to_optimize.file_path, ) test_path.unlink(missing_ok=True) test_path_behavior.unlink(missing_ok=True) @@ -1343,13 +1391,9 @@ def test_sort(): assert test_results[0].return_value is None out_str = """codeflash stdout: Sorting list result: [0, 1, 2, 3, 4, 5] +""" -codeflash stdout: Sorting list -result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] - -codeflash stdout: Sorting list -result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]""" - assert test_results[1].stdout == out_str + assert test_results[0].stdout == out_str assert test_results[1].id.function_getting_tested == "sorter" assert test_results[1].id.iteration_id == "2_2_1" assert test_results[1].id.test_class_name is None @@ -1360,6 +1404,10 @@ def test_sort(): ) assert test_results[1].runtime > 0 assert test_results[1].did_pass + out_str2 = """codeflash stdout: Sorting list +result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] +""" + assert test_results[1].stdout == out_str2 assert test_results[2].id.function_getting_tested == "sorter" assert test_results[2].id.iteration_id == "2_2_2" @@ -1371,6 +1419,10 @@ def test_sort(): ) assert test_results[2].runtime > 0 assert test_results[2].did_pass + out_str3 = """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +""" + assert test_results[2].stdout == out_str3 ctx_result = func_optimizer.get_code_optimization_context() code_context: CodeOptimizationContext = ctx_result.unwrap() original_helper_code: dict[Path, str] = {} @@ -1380,8 +1432,7 @@ def test_sort(): helper_code = f.read() original_helper_code[helper_function_path] = helper_code computed_fn_opt = True - line_profiler_output_file = add_decorator_imports( - func_optimizer.function_to_optimize, code_context) + line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context) line_profile_results, _ = func_optimizer.run_and_parse_tests( testing_type=TestingMode.LINE_PROFILE, test_env=test_env, @@ -1390,14 +1441,16 @@ def test_sort(): pytest_min_loops=1, pytest_max_loops=1, testing_time=0.1, - line_profiler_output_file = line_profiler_output_file + line_profiler_output_file=line_profiler_output_file, ) tmp_lpr = list(line_profile_results["timings"].keys()) - assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1]==3 + assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1] == 3 finally: if computed_fn_opt is True: func_optimizer.write_code_and_helpers( - func_optimizer.function_to_optimize_source_code, original_helper_code, func_optimizer.function_to_optimize.file_path + func_optimizer.function_to_optimize_source_code, + original_helper_code, + func_optimizer.function_to_optimize.file_path, ) test_path.unlink(missing_ok=True) test_path_perf.unlink(missing_ok=True) @@ -1602,6 +1655,10 @@ def test_sort(self): assert test_results[0].runtime > 0 assert test_results[0].did_pass assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],) + out_str = """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5] +""" + assert test_results[0].stdout == out_str assert test_results[1].id.function_getting_tested == "sorter" assert test_results[1].id.iteration_id == "4_0" @@ -1613,6 +1670,10 @@ def test_sort(self): ) assert test_results[1].runtime > 0 assert test_results[1].did_pass + out_str = """codeflash stdout: Sorting list +result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] +""" + assert test_results[1].stdout == out_str assert test_results[2].id.function_getting_tested == "sorter" assert test_results[2].id.iteration_id == "7_0" @@ -1665,6 +1726,10 @@ def test_sort(self): ) assert test_results[2].runtime > 0 assert test_results[2].did_pass + out_str = """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +""" + assert test_results[2].stdout == out_str finally: test_path.unlink(missing_ok=True) test_path_behavior.unlink(missing_ok=True) @@ -1849,6 +1914,10 @@ def test_sort(self, input, expected_output): assert test_results[0].runtime > 0 assert test_results[0].did_pass assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],) + out_str = """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5] +""" + assert test_results[0].stdout == out_str assert test_results[1].id.function_getting_tested == "sorter" assert test_results[1].id.iteration_id == "0_1" @@ -1860,6 +1929,10 @@ def test_sort(self, input, expected_output): ) assert test_results[1].runtime > 0 assert test_results[1].did_pass + out_str = """codeflash stdout: Sorting list +result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] +""" + assert test_results[1].stdout == out_str assert test_results[2].id.function_getting_tested == "sorter" assert test_results[2].id.iteration_id == "0_2" @@ -1871,6 +1944,10 @@ def test_sort(self, input, expected_output): ) assert test_results[2].runtime > 0 assert test_results[2].did_pass + out_str = """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +""" + assert test_results[2].stdout == out_str test_results, coverage_data = func_optimizer.run_and_parse_tests( testing_type=TestingMode.PERFORMANCE, @@ -2100,6 +2177,10 @@ def test_sort(self): assert test_results[0].runtime > 0 assert test_results[0].did_pass assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],) + out_str = """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5] +""" + assert test_results[0].stdout == out_str assert test_results[1].id.function_getting_tested == "sorter" assert test_results[1].id.iteration_id == "2_2_1" @@ -2111,6 +2192,10 @@ def test_sort(self): ) assert test_results[1].runtime > 0 assert test_results[1].did_pass + out_str = """codeflash stdout: Sorting list +result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] +""" + assert test_results[1].stdout == out_str assert test_results[2].id.function_getting_tested == "sorter" assert test_results[2].id.iteration_id == "2_2_2" @@ -2122,6 +2207,10 @@ def test_sort(self): ) assert test_results[2].runtime > 0 assert test_results[2].did_pass + out_str = """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +""" + assert test_results[2].stdout == out_str test_results, coverage_data = func_optimizer.run_and_parse_tests( test_env=test_env, @@ -2349,6 +2438,10 @@ def test_sort(self, input, expected_output): assert test_results[0].runtime > 0 assert test_results[0].did_pass assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],) + out_str = """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5] +""" + assert test_results[0].stdout == out_str assert test_results[1].id.function_getting_tested == "sorter" assert test_results[1].id.iteration_id == "0_0_1" @@ -2360,6 +2453,10 @@ def test_sort(self, input, expected_output): ) assert test_results[1].runtime > 0 assert test_results[1].did_pass + out_str = """codeflash stdout: Sorting list +result: [0, 1, 2, 3, 4, 5] +""" + assert test_results[1].stdout == out_str assert test_results[2].id.function_getting_tested == "sorter" assert test_results[2].id.iteration_id == "0_0_2" @@ -2371,6 +2468,10 @@ def test_sort(self, input, expected_output): ) assert test_results[2].runtime > 0 assert test_results[2].did_pass + out_str = """codeflash stdout: Sorting list +result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] +""" + assert test_results[2].stdout == out_str assert test_results[3].id.function_getting_tested == "sorter" assert test_results[3].id.iteration_id == "0_0_3" @@ -2829,7 +2930,8 @@ def test_code_replacement10() -> None: assert code_context.testgen_context_code == get_code_output """ - expected = """import gc + expected = ( + """import gc import os import sqlite3 import time @@ -2839,39 +2941,9 @@ def test_code_replacement10() -> None: from codeflash.optimization.optimizer import Optimizer -def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, function_name, line_id, loop_index, codeflash_cur, codeflash_con, *args, **kwargs): - test_id = f'{{test_module_name}}:{{test_class_name}}:{{test_name}}:{{line_id}}:{{loop_index}}' - if not hasattr(codeflash_wrap, 'index'): - codeflash_wrap.index = {{}} - if test_id in codeflash_wrap.index: - codeflash_wrap.index[test_id] += 1 - else: - codeflash_wrap.index[test_id] = 0 - codeflash_test_index = codeflash_wrap.index[test_id] - invocation_id = f'{{line_id}}_{{codeflash_test_index}}' """ - if sys.version_info < (3, 12): - expected += """ print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!")""" - else: - expected += """ print(f'!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!')""" - expected += """ - exception = None - gc.disable() - try: - counter = time.perf_counter_ns() - return_value = wrapped(*args, **kwargs) - codeflash_duration = time.perf_counter_ns() - counter - except Exception as e: - codeflash_duration = time.perf_counter_ns() - counter - exception = e - gc.enable() - pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value) - codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call')) - codeflash_con.commit() - if exception: - raise exception - return return_value - + + codeflash_wrap_string + + """ def test_code_replacement10() -> None: codeflash_loop_index = int(os.environ['CODEFLASH_LOOP_INDEX']) codeflash_iteration = os.environ['CODEFLASH_TEST_ITERATION'] @@ -2890,6 +2962,7 @@ def test_code_replacement10() -> None: assert code_context.testgen_context_code == get_code_output codeflash_con.close() """ + ) with tempfile.NamedTemporaryFile(mode="w") as f: f.write(code) @@ -2907,7 +2980,7 @@ def test_code_replacement10() -> None: ) os.chdir(original_cwd) assert success - assert new_test == expected.format( + assert new_test.replace('"', "'") == expected.replace('"', "'").format( module_path=Path(f.name).name, tmp_dir_path=get_run_tmp_file(Path("test_return_values")) ) diff --git a/tests/test_instrumentation_run_results_aiservice.py b/tests/test_instrumentation_run_results_aiservice.py index c1a759681..ddfd8de8c 100644 --- a/tests/test_instrumentation_run_results_aiservice.py +++ b/tests/test_instrumentation_run_results_aiservice.py @@ -6,7 +6,6 @@ from pathlib import Path import isort - from code_to_optimize.bubble_sort_method import BubbleSorter from codeflash.code_utils.code_utils import get_run_tmp_file from codeflash.discovery.functions_to_optimize import FunctionToOptimize @@ -16,17 +15,18 @@ from codeflash.verification.instrument_codeflash_capture import instrument_codeflash_capture # Used by aiservice instrumentation -behavior_logging_code = """from __future__ import annotations +behavior_logging_code = """ +from __future__ import annotations import gc import inspect import os import time +import dill as pickle from pathlib import Path from typing import Any, Callable, Optional -import dill as pickle def codeflash_wrap( wrapped: Callable[..., Any], @@ -48,8 +48,9 @@ def codeflash_wrap( codeflash_wrap.index[test_id] = 0 codeflash_test_index = codeflash_wrap.index[test_id] invocation_id = f"{line_id}_{codeflash_test_index}" + test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}" print( - f"!######{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}######!" + f"!$######{test_stdout_tag}######$!" ) exception = None gc.disable() @@ -61,6 +62,7 @@ def codeflash_wrap( codeflash_duration = time.perf_counter_ns() - counter exception = e gc.enable() + print(f"!######{test_stdout_tag}######!") iteration = os.environ["CODEFLASH_TEST_ITERATION"] with Path( "{codeflash_run_tmp_dir_client_side}", f"test_return_values_{iteration}.bin" @@ -178,7 +180,7 @@ def test_single_element_list(): testing_time=0.1, ) assert test_results[0].id.function_getting_tested == "sorter" - assert test_results[0].stdout == "codeflash stdout : BubbleSorter.sorter() called" + assert test_results[0].stdout == "codeflash stdout : BubbleSorter.sorter() called\n" assert test_results[0].id.test_function_name == "test_single_element_list" assert test_results[0].did_pass assert test_results[0].return_value[1]["arr"] == [42] @@ -222,7 +224,7 @@ def sorter(self, arr): test_results, test_results_mutated_attr ) # Without codeflash capture, the init state was not verified, and the results are verified as correct even with the attribute mutated - assert test_results_mutated_attr[0].stdout == "codeflash stdout : BubbleSorter.sorter() called" + assert test_results_mutated_attr[0].stdout == "codeflash stdout : BubbleSorter.sorter() called\n" finally: fto_path.write_text(original_code, "utf-8") test_path.unlink(missing_ok=True) @@ -319,23 +321,32 @@ def test_single_element_list(): testing_time=0.1, ) # Verify instance_state result, which checks instance state right after __init__, using codeflash_capture - assert test_results[0].id.function_getting_tested == "BubbleSorter.__init__" - assert test_results[0].id.test_function_name == "test_single_element_list" - assert test_results[0].did_pass - assert test_results[0].return_value[0] == {"x": 0} - assert test_results[0].stdout == "codeflash stdout : BubbleSorter.sorter() called" # Verify function_to_optimize result - assert test_results[1].id.function_getting_tested == "sorter" - assert test_results[1].id.test_function_name == "test_single_element_list" - assert test_results[1].did_pass + assert test_results[0].id.function_getting_tested == "sorter" + assert test_results[0].id.test_function_name == "test_single_element_list" + assert test_results[0].did_pass # Checks input values to the function to see if they have mutated # assert comparator(test_results[1].return_value[1]["self"], BubbleSorter()) TODO: add self as input - assert test_results[1].return_value[1]["arr"] == [1, 2, 3] + assert test_results[0].return_value[1]["arr"] == [1, 2, 3] # Check function return value - assert test_results[1].return_value[2] == [1, 2, 3] + assert test_results[0].return_value[2] == [1, 2, 3] + assert ( + test_results[0].stdout + == """codeflash stdout : BubbleSorter.sorter() called +""" + ) + assert test_results[1].id.function_getting_tested == "BubbleSorter.__init__" + assert test_results[1].id.test_function_name == "test_single_element_list" + assert test_results[1].did_pass + assert test_results[1].return_value[0] == {"x": 0} + assert ( + test_results[1].stdout + == """codeflash stdout : BubbleSorter.sorter() called +""" + ) # Replace with optimized code that mutated instance attribute optimized_code_mutated_attr = """ import sys @@ -390,10 +401,10 @@ def sorter(self, arr): testing_time=0.1, ) # assert test_results_mutated_attr[0].return_value[0]["self"].x == 1 TODO: add self as input - assert test_results_mutated_attr[0].id.function_getting_tested == "BubbleSorter.__init__" - assert test_results_mutated_attr[0].return_value[0] == {"x": 1} - assert test_results_mutated_attr[0].verification_type == VerificationType.INIT_STATE_FTO - assert test_results_mutated_attr[0].stdout == "codeflash stdout : BubbleSorter.sorter() called" + assert test_results_mutated_attr[1].id.function_getting_tested == "BubbleSorter.__init__" + assert test_results_mutated_attr[1].return_value[0] == {"x": 1} + assert test_results_mutated_attr[1].verification_type == VerificationType.INIT_STATE_FTO + assert test_results_mutated_attr[1].stdout == "codeflash stdout : BubbleSorter.sorter() called\n" assert not compare_test_results( test_results, test_results_mutated_attr ) # The test should fail because the instance attribute was mutated @@ -442,10 +453,10 @@ def sorter(self, arr): pytest_max_loops=1, testing_time=0.1, ) - assert test_results_new_attr[0].id.function_getting_tested == "BubbleSorter.__init__" - assert test_results_new_attr[0].return_value[0] == {"x": 0, "y": 2} - assert test_results_new_attr[0].verification_type == VerificationType.INIT_STATE_FTO - assert test_results_new_attr[0].stdout == "codeflash stdout : BubbleSorter.sorter() called" + assert test_results_new_attr[1].id.function_getting_tested == "BubbleSorter.__init__" + assert test_results_new_attr[1].return_value[0] == {"x": 0, "y": 2} + assert test_results_new_attr[1].verification_type == VerificationType.INIT_STATE_FTO + assert test_results_new_attr[1].stdout == "codeflash stdout : BubbleSorter.sorter() called\n" # assert test_results_new_attr[1].return_value[1]["self"].x == 0 TODO: add self as input # assert test_results_new_attr[1].return_value[1]["self"].y == 2 TODO: add self as input assert compare_test_results( diff --git a/tests/test_test_runner.py b/tests/test_test_runner.py index 0e80b76e0..5dc6df678 100644 --- a/tests/test_test_runner.py +++ b/tests/test_test_runner.py @@ -1,5 +1,3 @@ -import re - import os import tempfile from pathlib import Path @@ -145,5 +143,5 @@ def test_sort(): test_xml_file_path=result_file, test_files=test_files, test_config=config, run_result=process ) match = ImportErrorPattern.search(process.stdout).group() - assert match=="ModuleNotFoundError: No module named 'torch'" + assert match == "ModuleNotFoundError: No module named 'torch'" result_file.unlink(missing_ok=True) From bc47662b9bf493dcfebdd6d793765680468886c3 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 3 Jun 2025 01:16:26 -0700 Subject: [PATCH 05/14] remove spare comments --- .../code_utils/instrument_existing_tests.py | 43 ------------------- 1 file changed, 43 deletions(-) diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py index 9c071996a..6eac52809 100644 --- a/codeflash/code_utils/instrument_existing_tests.py +++ b/codeflash/code_utils/instrument_existing_tests.py @@ -508,50 +508,7 @@ def create_wrapper_function(mode: TestingMode = TestingMode.BEHAVIOR) -> ast.Fun keywords=[], ) ), - # ast.Expr( - # value=ast.Call( - # func=ast.Name(id="print", ctx=ast.Load()), - # args=[ - # ast.JoinedStr( - # values=[ - # ast.Constant(value="!######"), - # ast.FormattedValue( - # value=ast.Name(id="test_module_name", ctx=ast.Load()), conversion=-1 - # ), - # ast.Constant(value=":"), - # ast.FormattedValue( - # value=ast.IfExp( - # test=ast.Name(id="test_class_name", ctx=ast.Load()), - # body=ast.BinOp( - # left=ast.Name(id="test_class_name", ctx=ast.Load()), - # op=ast.Add(), - # right=ast.Constant(value="."), - # ), - # orelse=ast.Constant(value=""), - # ), - # conversion=-1, - # ), - # ast.FormattedValue(value=ast.Name(id="test_name", ctx=ast.Load()), conversion=-1), - # ast.Constant(value=":"), - # ast.FormattedValue( - # value=ast.Name(id="function_name", ctx=ast.Load()), conversion=-1 - # ), - # ast.Constant(value=":"), - # ast.FormattedValue(value=ast.Name(id="loop_index", ctx=ast.Load()), conversion=-1), - # ast.Constant(value=":"), - # ast.FormattedValue( - # value=ast.Name(id="invocation_id", ctx=ast.Load()), conversion=-1 - # ), - # ast.Constant(value="######!"), - # ] - # ) - # ], - # keywords=[], - # ) - # ) ] - # if mode == TestingMode.BEHAVIOR - # else [] ), ast.Assign( targets=[ast.Name(id="exception", ctx=ast.Store())], value=ast.Constant(value=None), lineno=lineno + 10 From 80baa7857db0c5c17e2fee37f3ec672b97ad00f7 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 3 Jun 2025 01:27:33 -0700 Subject: [PATCH 06/14] fix more tests --- tests/test_instrument_all_and_run.py | 69 ++++++++++------------------ 1 file changed, 23 insertions(+), 46 deletions(-) diff --git a/tests/test_instrument_all_and_run.py b/tests/test_instrument_all_and_run.py index 5bc942fdd..15676aa7a 100644 --- a/tests/test_instrument_all_and_run.py +++ b/tests/test_instrument_all_and_run.py @@ -25,7 +25,8 @@ codeflash_wrap.index[test_id] = 0 codeflash_test_index = codeflash_wrap.index[test_id] invocation_id = f'{{line_id}}_{{codeflash_test_index}}' - print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!") + test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}" + print(f"!$######{{test_stdout_tag}}######$!") exception = None gc.disable() try: @@ -36,6 +37,7 @@ codeflash_duration = time.perf_counter_ns() - counter exception = e gc.enable() + print(f"!######{{test_stdout_tag}}######!") pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value) codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call')) codeflash_con.commit() @@ -170,9 +172,8 @@ def test_sort(): out_str = """codeflash stdout: Sorting list result: [0, 1, 2, 3, 4, 5] - -codeflash stdout: Sorting list -result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]""" +""" + assert test_results[0].stdout == out_str assert out_str == test_results[0].stdout assert test_results[0].id.function_getting_tested == "sorter" assert test_results[0].id.iteration_id == "1_0" @@ -185,7 +186,10 @@ def test_sort(): assert test_results[0].runtime > 0 assert test_results[0].did_pass assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],) - assert out_str == test_results[1].stdout.strip() + out_str = """codeflash stdout: Sorting list +result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] +""" + assert out_str == test_results[1].stdout assert test_results[1].id.function_getting_tested == "sorter" assert test_results[1].id.iteration_id == "4_0" @@ -197,6 +201,10 @@ def test_sort(): ) assert test_results[1].runtime > 0 assert test_results[1].did_pass + out_str = """codeflash stdout: Sorting list +result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] +""" + assert test_results[1].stdout == out_str results2, _ = func_optimizer.run_and_parse_tests( testing_type=TestingMode.BEHAVIOR, test_env=test_env, @@ -208,10 +216,8 @@ def test_sort(): ) out_str = """codeflash stdout: Sorting list result: [0, 1, 2, 3, 4, 5] - -codeflash stdout: Sorting list -result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]""" - assert out_str == results2[0].stdout.strip() +""" + assert out_str == results2[0].stdout assert compare_test_results(test_results, results2) finally: fto_path.write_text(original_code, "utf-8") @@ -234,7 +240,8 @@ def test_sort(): output = sort_class.sorter(input) assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]""" - expected = """import gc + expected = ( + """import gc import os import sqlite3 import time @@ -242,42 +249,9 @@ def test_sort(): import dill as pickle from code_to_optimize.bubble_sort_method import BubbleSorter - - -def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, function_name, line_id, loop_index, codeflash_cur, codeflash_con, *args, **kwargs): - test_id = f'{{test_module_name}}:{{test_class_name}}:{{test_name}}:{{line_id}}:{{loop_index}}' - if not hasattr(codeflash_wrap, 'index'): - codeflash_wrap.index = {{}} - if test_id in codeflash_wrap.index: - codeflash_wrap.index[test_id] += 1 - else: - codeflash_wrap.index[test_id] = 0 - codeflash_test_index = codeflash_wrap.index[test_id] - invocation_id = f'{{line_id}}_{{codeflash_test_index}}' - """ - if sys.version_info < (3, 12): - expected += """print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!")""" - else: - expected += """print(f'!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!')""" - expected += """ - exception = None - gc.disable() - try: - counter = time.perf_counter_ns() - return_value = wrapped(*args, **kwargs) - codeflash_duration = time.perf_counter_ns() - counter - except Exception as e: - codeflash_duration = time.perf_counter_ns() - counter - exception = e - gc.enable() - pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value) - codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call')) - codeflash_con.commit() - if exception: - raise exception - return return_value """ - expected += """ + + codeflash_wrap_string + + """ def test_sort(): codeflash_loop_index = int(os.environ['CODEFLASH_LOOP_INDEX']) codeflash_iteration = os.environ['CODEFLASH_TEST_ITERATION'] @@ -294,6 +268,7 @@ def test_sort(): assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] codeflash_con.close() """ + ) fto_path = (Path(__file__).parent.resolve() / "../code_to_optimize/bubble_sort_method.py").resolve() original_code = fto_path.read_text("utf-8") fto = FunctionToOptimize( @@ -379,7 +354,9 @@ def test_sort(): assert test_results[1].runtime > 0 assert test_results[1].did_pass assert test_results[1].return_value == ([0, 1, 2, 3, 4, 5],) - out_str = """codeflash stdout : BubbleSorter.sorter() called\n\n\ncodeflash stdout : BubbleSorter.sorter() called""" + out_str = ( + """codeflash stdout : BubbleSorter.sorter() called\n\n\ncodeflash stdout : BubbleSorter.sorter() called""" + ) assert test_results[1].stdout == out_str assert compare_test_results(test_results, test_results) assert test_results[2].id.function_getting_tested == "BubbleSorter.__init__" From 29b2f2172e174914a904af1d2f8a8a8f425bd721 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 3 Jun 2025 02:04:30 -0700 Subject: [PATCH 07/14] fix more tests --- tests/test_instrument_all_and_run.py | 3 ++- tests/test_instrument_tests.py | 27 ++++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/tests/test_instrument_all_and_run.py b/tests/test_instrument_all_and_run.py index 15676aa7a..812533521 100644 --- a/tests/test_instrument_all_and_run.py +++ b/tests/test_instrument_all_and_run.py @@ -247,8 +247,9 @@ def test_sort(): import time import dill as pickle - from code_to_optimize.bubble_sort_method import BubbleSorter + + """ + codeflash_wrap_string + """ diff --git a/tests/test_instrument_tests.py b/tests/test_instrument_tests.py index 80c69bec9..c5fd19383 100644 --- a/tests/test_instrument_tests.py +++ b/tests/test_instrument_tests.py @@ -307,10 +307,12 @@ def test_prepare_image_for_yolo(): def test_perfinjector_bubble_sort_results() -> None: computed_fn_opt = False code = """from code_to_optimize.bubble_sort import sorter +import datetime def test_sort(): input = [5, 4, 3, 2, 1, 0] + print(datetime.datetime.now().isoformat()) output = sorter(input) assert output == [0, 1, 2, 3, 4, 5] @@ -319,7 +321,8 @@ def test_sort(): assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]""" expected = ( - """import gc + """import datetime +import gc import os import sqlite3 import time @@ -339,17 +342,19 @@ def test_sort(): codeflash_cur = codeflash_con.cursor() codeflash_cur.execute('CREATE TABLE IF NOT EXISTS test_results (test_module_path TEXT, test_class_name TEXT, test_function_name TEXT, function_getting_tested TEXT, loop_index INTEGER, iteration_id TEXT, runtime INTEGER, return_value BLOB, verification_type TEXT)') input = [5, 4, 3, 2, 1, 0] - output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '1', codeflash_loop_index, codeflash_cur, codeflash_con, input) + print(datetime.datetime.now().isoformat()) + output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '2', codeflash_loop_index, codeflash_cur, codeflash_con, input) assert output == [0, 1, 2, 3, 4, 5] input = [5.0, 4.0, 3.0, 2.0, 1.0, 0.0] - output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '4', codeflash_loop_index, codeflash_cur, codeflash_con, input) + output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '5', codeflash_loop_index, codeflash_cur, codeflash_con, input) assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] codeflash_con.close() """ ) expected_perfonly = ( - """import gc + """import datetime +import gc import os import time @@ -362,10 +367,10 @@ def test_sort(): def test_sort(): codeflash_loop_index = int(os.environ['CODEFLASH_LOOP_INDEX']) input = [5, 4, 3, 2, 1, 0] - output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '1', codeflash_loop_index, input) + output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '2', codeflash_loop_index, input) assert output == [0, 1, 2, 3, 4, 5] input = [5.0, 4.0, 3.0, 2.0, 1.0, 0.0] - output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '4', codeflash_loop_index, input) + output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '5', codeflash_loop_index, input) assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] """ ) @@ -390,7 +395,7 @@ def test_sort(): os.chdir(run_cwd) success, new_test = inject_profiling_into_existing_test( test_path, - [CodePosition(6, 13), CodePosition(10, 13)], + [CodePosition(8, 13), CodePosition(12, 13)], func, project_root_path, "pytest", @@ -560,6 +565,7 @@ def test_perfinjector_bubble_sort_parametrized_results() -> None: computed_fn_opt = False code = """from code_to_optimize.bubble_sort import sorter import pytest +import datetime @pytest.mark.parametrize( @@ -571,6 +577,7 @@ def test_perfinjector_bubble_sort_parametrized_results() -> None: ], ) def test_sort_parametrized(input, expected_output): + print(datetime.datetime.now().isoformat()) output = sorter(input) assert output == expected_output """ @@ -606,6 +613,7 @@ def test_sort_parametrized(input, expected_output): """import gc import os import time +import datetime import pytest @@ -618,6 +626,7 @@ def test_sort_parametrized(input, expected_output): @pytest.mark.parametrize('input, expected_output', [([5, 4, 3, 2, 1, 0], [0, 1, 2, 3, 4, 5]), ([5.0, 4.0, 3.0, 2.0, 1.0, 0.0], [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]), (list(reversed(range(50))), list(range(50)))]) def test_sort_parametrized(input, expected_output): codeflash_loop_index = int(os.environ['CODEFLASH_LOOP_INDEX']) + print(datetime.datetime.now().isoformat()) output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort_parametrized', 'sorter', '0', codeflash_loop_index, input) assert output == expected_output """ @@ -643,11 +652,11 @@ def test_sort_parametrized(input, expected_output): func = FunctionToOptimize(function_name="sorter", parents=[], file_path=code_path) os.chdir(run_cwd) success, new_test = inject_profiling_into_existing_test( - test_path, [CodePosition(14, 13)], func, project_root_path, "pytest", mode=TestingMode.BEHAVIOR + test_path, [CodePosition(16, 13)], func, project_root_path, "pytest", mode=TestingMode.BEHAVIOR ) assert success success, new_test_perf = inject_profiling_into_existing_test( - test_path, [CodePosition(14, 13)], func, project_root_path, "pytest", mode=TestingMode.PERFORMANCE + test_path, [CodePosition(16, 13)], func, project_root_path, "pytest", mode=TestingMode.PERFORMANCE ) os.chdir(original_cwd) From 1070815f972aa07b708178f1ee1d33d0afa1db43 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 3 Jun 2025 02:09:08 -0700 Subject: [PATCH 08/14] make ruff happy --- codeflash/verification/parse_test_output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py index 480b40630..ed9ebf1c8 100644 --- a/codeflash/verification/parse_test_output.py +++ b/codeflash/verification/parse_test_output.py @@ -265,7 +265,7 @@ def parse_test_xml( timed_out = True sys_stdout = testcase.system_out or "" - begin_matches = [match for match in matches_re_start.finditer(sys_stdout)] + begin_matches = list(matches_re_start.finditer(sys_stdout)) end_matches = {} for match in matches_re_end.finditer(sys_stdout): groups = match.groups() From 38f1ab55cc72a06e71cf189dd0e3428589c93ab7 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 3 Jun 2025 13:42:44 -0700 Subject: [PATCH 09/14] try one fix attempt --- tests/test_instrument_all_and_run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_instrument_all_and_run.py b/tests/test_instrument_all_and_run.py index 812533521..75b1de3a3 100644 --- a/tests/test_instrument_all_and_run.py +++ b/tests/test_instrument_all_and_run.py @@ -247,6 +247,7 @@ def test_sort(): import time import dill as pickle + from code_to_optimize.bubble_sort_method import BubbleSorter From 3a6e4816788eb6f4f892140cb016d3d278e73ec9 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 3 Jun 2025 15:46:58 -0700 Subject: [PATCH 10/14] fix one test --- codeflash/verification/codeflash_capture.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/codeflash/verification/codeflash_capture.py b/codeflash/verification/codeflash_capture.py index 3d28a027e..45d046cf6 100644 --- a/codeflash/verification/codeflash_capture.py +++ b/codeflash/verification/codeflash_capture.py @@ -112,9 +112,8 @@ def wrapper(*args, **kwargs) -> None: # noqa: ANN002, ANN003 # Generate invocation id invocation_id = f"{line_id}_{codeflash_test_index}" - print( - f"!######{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}######!" - ) + test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}" + print(f"!$######{test_stdout_tag}######$!") # Connect to sqlite codeflash_con = sqlite3.connect(f"{tmp_dir_path}_{codeflash_iteration}.sqlite") codeflash_cur = codeflash_con.cursor() @@ -131,6 +130,7 @@ def wrapper(*args, **kwargs) -> None: # noqa: ANN002, ANN003 exception = e finally: gc.enable() + print(f"!######{test_stdout_tag}######!") # Capture instance state after initialization if hasattr(args[0], "__dict__"): From 537ca0139ca6d24e04dd8490eec9c5ed155c20b3 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 3 Jun 2025 15:55:48 -0700 Subject: [PATCH 11/14] fix one test --- tests/test_instrument_all_and_run.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_instrument_all_and_run.py b/tests/test_instrument_all_and_run.py index 75b1de3a3..7e1a20f49 100644 --- a/tests/test_instrument_all_and_run.py +++ b/tests/test_instrument_all_and_run.py @@ -356,9 +356,7 @@ def test_sort(): assert test_results[1].runtime > 0 assert test_results[1].did_pass assert test_results[1].return_value == ([0, 1, 2, 3, 4, 5],) - out_str = ( - """codeflash stdout : BubbleSorter.sorter() called\n\n\ncodeflash stdout : BubbleSorter.sorter() called""" - ) + out_str = """codeflash stdout : BubbleSorter.sorter() called\n""" assert test_results[1].stdout == out_str assert compare_test_results(test_results, test_results) assert test_results[2].id.function_getting_tested == "BubbleSorter.__init__" @@ -376,6 +374,7 @@ def test_sort(): ) assert test_results[3].runtime > 0 assert test_results[3].did_pass + assert test_results[3].stdout == """codeflash stdout : BubbleSorter.sorter() called\n""" results2, _ = func_optimizer.run_and_parse_tests( testing_type=TestingMode.BEHAVIOR, From 3cd3f0a111fd6975a3fb13e7ff0127d42bbff3ac Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 3 Jun 2025 16:09:35 -0700 Subject: [PATCH 12/14] fix one more test --- tests/test_instrument_tests.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/tests/test_instrument_tests.py b/tests/test_instrument_tests.py index c5fd19383..254352073 100644 --- a/tests/test_instrument_tests.py +++ b/tests/test_instrument_tests.py @@ -367,6 +367,7 @@ def test_sort(): def test_sort(): codeflash_loop_index = int(os.environ['CODEFLASH_LOOP_INDEX']) input = [5, 4, 3, 2, 1, 0] + print(datetime.datetime.now().isoformat()) output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '2', codeflash_loop_index, input) assert output == [0, 1, 2, 3, 4, 5] input = [5.0, 4.0, 3.0, 2.0, 1.0, 0.0] @@ -395,7 +396,7 @@ def test_sort(): os.chdir(run_cwd) success, new_test = inject_profiling_into_existing_test( test_path, - [CodePosition(8, 13), CodePosition(12, 13)], + [CodePosition(8, 14), CodePosition(12, 14)], func, project_root_path, "pytest", @@ -411,7 +412,7 @@ def test_sort(): success, new_perf_test = inject_profiling_into_existing_test( test_path, - [CodePosition(6, 13), CodePosition(10, 13)], + [CodePosition(8, 14), CodePosition(12, 14)], func, project_root_path, "pytest", @@ -461,7 +462,7 @@ def test_sort(): testing_time=0.1, ) assert test_results[0].id.function_getting_tested == "sorter" - assert test_results[0].id.iteration_id == "1_0" + assert test_results[0].id.iteration_id == "2_0" assert test_results[0].id.test_class_name is None assert test_results[0].id.test_function_name == "test_sort" assert ( @@ -473,7 +474,7 @@ def test_sort(): assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],) assert test_results[1].id.function_getting_tested == "sorter" - assert test_results[1].id.iteration_id == "4_0" + assert test_results[1].id.iteration_id == "5_0" assert test_results[1].id.test_class_name is None assert test_results[1].id.test_function_name == "test_sort" assert ( @@ -496,7 +497,7 @@ def test_sort(): testing_time=0.1, ) assert test_results_perf[0].id.function_getting_tested == "sorter" - assert test_results_perf[0].id.iteration_id == "1_0" + assert test_results_perf[0].id.iteration_id == "2_0" assert test_results_perf[0].id.test_class_name is None assert test_results_perf[0].id.test_function_name == "test_sort" assert ( @@ -514,7 +515,7 @@ def test_sort(): ) assert test_results_perf[1].id.function_getting_tested == "sorter" - assert test_results_perf[1].id.iteration_id == "4_0" + assert test_results_perf[1].id.iteration_id == "5_0" assert test_results_perf[1].id.test_class_name is None assert test_results_perf[1].id.test_function_name == "test_sort" assert ( @@ -565,7 +566,6 @@ def test_perfinjector_bubble_sort_parametrized_results() -> None: computed_fn_opt = False code = """from code_to_optimize.bubble_sort import sorter import pytest -import datetime @pytest.mark.parametrize( @@ -577,7 +577,6 @@ def test_perfinjector_bubble_sort_parametrized_results() -> None: ], ) def test_sort_parametrized(input, expected_output): - print(datetime.datetime.now().isoformat()) output = sorter(input) assert output == expected_output """ @@ -613,7 +612,6 @@ def test_sort_parametrized(input, expected_output): """import gc import os import time -import datetime import pytest @@ -626,7 +624,6 @@ def test_sort_parametrized(input, expected_output): @pytest.mark.parametrize('input, expected_output', [([5, 4, 3, 2, 1, 0], [0, 1, 2, 3, 4, 5]), ([5.0, 4.0, 3.0, 2.0, 1.0, 0.0], [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]), (list(reversed(range(50))), list(range(50)))]) def test_sort_parametrized(input, expected_output): codeflash_loop_index = int(os.environ['CODEFLASH_LOOP_INDEX']) - print(datetime.datetime.now().isoformat()) output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort_parametrized', 'sorter', '0', codeflash_loop_index, input) assert output == expected_output """ @@ -652,11 +649,11 @@ def test_sort_parametrized(input, expected_output): func = FunctionToOptimize(function_name="sorter", parents=[], file_path=code_path) os.chdir(run_cwd) success, new_test = inject_profiling_into_existing_test( - test_path, [CodePosition(16, 13)], func, project_root_path, "pytest", mode=TestingMode.BEHAVIOR + test_path, [CodePosition(14, 13)], func, project_root_path, "pytest", mode=TestingMode.BEHAVIOR ) assert success success, new_test_perf = inject_profiling_into_existing_test( - test_path, [CodePosition(16, 13)], func, project_root_path, "pytest", mode=TestingMode.PERFORMANCE + test_path, [CodePosition(14, 13)], func, project_root_path, "pytest", mode=TestingMode.PERFORMANCE ) os.chdir(original_cwd) From 6c00fb3b5c98c48c59902feb9175707df2bf1b88 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 3 Jun 2025 16:18:02 -0700 Subject: [PATCH 13/14] fix one more test --- ...t_instrumentation_run_results_aiservice.py | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/tests/test_instrumentation_run_results_aiservice.py b/tests/test_instrumentation_run_results_aiservice.py index ddfd8de8c..78d9973f1 100644 --- a/tests/test_instrumentation_run_results_aiservice.py +++ b/tests/test_instrumentation_run_results_aiservice.py @@ -323,30 +323,27 @@ def test_single_element_list(): # Verify instance_state result, which checks instance state right after __init__, using codeflash_capture # Verify function_to_optimize result - assert test_results[0].id.function_getting_tested == "sorter" + assert test_results[0].id.function_getting_tested == "BubbleSorter.__init__" assert test_results[0].id.test_function_name == "test_single_element_list" assert test_results[0].did_pass + assert test_results[0].return_value[0] == {"x": 0} + assert test_results[0].stdout == "" + assert test_results[1].id.function_getting_tested == "sorter" + assert test_results[1].id.test_function_name == "test_single_element_list" + assert test_results[1].did_pass # Checks input values to the function to see if they have mutated # assert comparator(test_results[1].return_value[1]["self"], BubbleSorter()) TODO: add self as input - assert test_results[0].return_value[1]["arr"] == [1, 2, 3] + assert test_results[1].return_value[1]["arr"] == [1, 2, 3] # Check function return value - assert test_results[0].return_value[2] == [1, 2, 3] - assert ( - test_results[0].stdout - == """codeflash stdout : BubbleSorter.sorter() called -""" - ) - assert test_results[1].id.function_getting_tested == "BubbleSorter.__init__" - assert test_results[1].id.test_function_name == "test_single_element_list" - assert test_results[1].did_pass - assert test_results[1].return_value[0] == {"x": 0} + assert test_results[1].return_value[2] == [1, 2, 3] assert ( test_results[1].stdout == """codeflash stdout : BubbleSorter.sorter() called """ ) + # Replace with optimized code that mutated instance attribute optimized_code_mutated_attr = """ import sys @@ -401,10 +398,10 @@ def sorter(self, arr): testing_time=0.1, ) # assert test_results_mutated_attr[0].return_value[0]["self"].x == 1 TODO: add self as input - assert test_results_mutated_attr[1].id.function_getting_tested == "BubbleSorter.__init__" - assert test_results_mutated_attr[1].return_value[0] == {"x": 1} - assert test_results_mutated_attr[1].verification_type == VerificationType.INIT_STATE_FTO - assert test_results_mutated_attr[1].stdout == "codeflash stdout : BubbleSorter.sorter() called\n" + assert test_results_mutated_attr[0].id.function_getting_tested == "BubbleSorter.__init__" + assert test_results_mutated_attr[0].return_value[0] == {"x": 1} + assert test_results_mutated_attr[0].verification_type == VerificationType.INIT_STATE_FTO + assert test_results_mutated_attr[0].stdout == "" assert not compare_test_results( test_results, test_results_mutated_attr ) # The test should fail because the instance attribute was mutated @@ -453,10 +450,10 @@ def sorter(self, arr): pytest_max_loops=1, testing_time=0.1, ) - assert test_results_new_attr[1].id.function_getting_tested == "BubbleSorter.__init__" - assert test_results_new_attr[1].return_value[0] == {"x": 0, "y": 2} - assert test_results_new_attr[1].verification_type == VerificationType.INIT_STATE_FTO - assert test_results_new_attr[1].stdout == "codeflash stdout : BubbleSorter.sorter() called\n" + assert test_results_new_attr[0].id.function_getting_tested == "BubbleSorter.__init__" + assert test_results_new_attr[0].return_value[0] == {"x": 0, "y": 2} + assert test_results_new_attr[0].verification_type == VerificationType.INIT_STATE_FTO + assert test_results_new_attr[0].stdout == "" # assert test_results_new_attr[1].return_value[1]["self"].x == 0 TODO: add self as input # assert test_results_new_attr[1].return_value[1]["self"].y == 2 TODO: add self as input assert compare_test_results( From 0e5f79fab3add6c92c2b488df9502991b318a144 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 3 Jun 2025 16:29:40 -0700 Subject: [PATCH 14/14] fix one more test --- tests/test_instrument_tests.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/tests/test_instrument_tests.py b/tests/test_instrument_tests.py index 254352073..a117c2205 100644 --- a/tests/test_instrument_tests.py +++ b/tests/test_instrument_tests.py @@ -131,11 +131,11 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi """ if sys.version_info < (3, 12): expected += """test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}" - print(f"!$######{{test_stdout_tag}}######$!")""" + """ else: expected += """test_stdout_tag = f'{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}' - print(f'!$######{{test_stdout_tag}}######$!')""" - expected += """ + """ + expected += """print(f'!$######{{test_stdout_tag}}######$!') exception = None gc.disable() try: @@ -146,12 +146,7 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi codeflash_duration = time.perf_counter_ns() - counter exception = e gc.enable() - """ - if sys.version_info < (3, 12): - expected += """print(f"!######{{test_stdout_tag}}######!")""" - else: - expected += """print(f'!######{{test_stdout_tag}}######!')""" - expected += """ + print(f'!######{{test_stdout_tag}}######!') pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value) codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call')) codeflash_con.commit() @@ -240,11 +235,11 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi """ if sys.version_info < (3, 12): expected += """test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}" - print(f"!$######{{test_stdout_tag}}######$!")""" + """ else: expected += """test_stdout_tag = f'{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}' - print(f'!$######{{test_stdout_tag}}######$!')""" - expected += """ + """ + expected += """print(f'!$######{{test_stdout_tag}}######$!') exception = None gc.disable() try: @@ -255,12 +250,7 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi codeflash_duration = time.perf_counter_ns() - counter exception = e gc.enable() - """ - if sys.version_info < (3, 12): - expected += """print(f"!######{{test_stdout_tag}}######!")""" - else: - expected += """print(f'!######{{test_stdout_tag}}######!')""" - expected += """ + print(f'!######{{test_stdout_tag}}######!') pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value) codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call')) codeflash_con.commit()