From f6fd4503dfb336e0df9fd5d9c139de1d0f60c42f Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Fri, 30 May 2025 20:03:53 -0700
Subject: [PATCH 01/14] first version of stdout capture improvement

---
 codeflash/verification/parse_test_output.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index b9060f4bc..9dcacc1d2 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -37,7 +37,7 @@ def parse_func(file_path: Path) -> XMLParser:
 
 
 matches_re = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!")
-cleaner_re = re.compile(r"!######.*?######!|-+\s*Captured\s+(Log|Out)\s*-+\n?")
+stdout_re = re.compile(r"!######.*?######!\n(.*)!\$######.*?######\$!", re.DOTALL)
 
 
 def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:
@@ -268,7 +268,7 @@ def parse_test_xml(
             matches = matches_re.findall(sys_stdout)
 
             if sys_stdout:
-                sys_stdout = cleaner_re.sub("", sys_stdout).strip()
+                sys_stdout = stdout_re.search(sys_stdout).group(1)
 
             if not matches or not len(matches):
                 test_results.add(

From 6dd2feef6ae591b8de78c89274f8b2a606c8e1b9 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Mon, 2 Jun 2025 17:59:29 -0700
Subject: [PATCH 02/14] progress

---
 .../code_utils/instrument_existing_tests.py   | 132 +++++++++++++-----
 codeflash/verification/parse_test_output.py   |   6 +-
 tests/test_instrument_tests.py                |  16 ++-
 3 files changed, 111 insertions(+), 43 deletions(-)

diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py
index e691c2ed2..50c3610bd 100644
--- a/codeflash/code_utils/instrument_existing_tests.py
+++ b/codeflash/code_utils/instrument_existing_tests.py
@@ -462,50 +462,96 @@ def create_wrapper_function(mode: TestingMode = TestingMode.BEHAVIOR) -> ast.Fun
         ),
         *(
             [
+                ast.Assign(
+                    targets=[
+                        ast.Name(id='test_stdout_tag', ctx=ast.Store())],
+                    value=ast.JoinedStr(
+                        values=[
+                            ast.FormattedValue(
+                                value=ast.Name(id='test_module_name', ctx=ast.Load()),
+                                conversion=-1),
+                            ast.Constant(value=':'),
+                            ast.FormattedValue(
+                                value=ast.IfExp(
+                                    test=ast.Name(id='test_class_name', ctx=ast.Load()),
+                                    body=ast.BinOp(
+                                        left=ast.Name(id='test_class_name', ctx=ast.Load()),
+                                        op=ast.Add(),
+                                        right=ast.Constant(value='.')),
+                                    orelse=ast.Constant(value='')),
+                                conversion=-1),
+                            ast.FormattedValue(
+                                value=ast.Name(id='test_name', ctx=ast.Load()),
+                                conversion=-1),
+                            ast.Constant(value=':'),
+                            ast.FormattedValue(
+                                value=ast.Name(id='function_name', ctx=ast.Load()),
+                                conversion=-1),
+                            ast.Constant(value=':'),
+                            ast.FormattedValue(
+                                value=ast.Name(id='loop_index', ctx=ast.Load()),
+                                conversion=-1),
+                            ast.Constant(value=':'),
+                            ast.FormattedValue(
+                                value=ast.Name(id='invocation_id', ctx=ast.Load()),
+                                conversion=-1)]),
+                lineno=lineno + 9,),
                 ast.Expr(
                     value=ast.Call(
-                        func=ast.Name(id="print", ctx=ast.Load()),
+                        func=ast.Name(id='print', ctx=ast.Load()),
                         args=[
                             ast.JoinedStr(
                                 values=[
-                                    ast.Constant(value="!######"),
+                                    ast.Constant(value='!$######'),
                                     ast.FormattedValue(
-                                        value=ast.Name(id="test_module_name", ctx=ast.Load()), conversion=-1
-                                    ),
-                                    ast.Constant(value=":"),
-                                    ast.FormattedValue(
-                                        value=ast.IfExp(
-                                            test=ast.Name(id="test_class_name", ctx=ast.Load()),
-                                            body=ast.BinOp(
-                                                left=ast.Name(id="test_class_name", ctx=ast.Load()),
-                                                op=ast.Add(),
-                                                right=ast.Constant(value="."),
-                                            ),
-                                            orelse=ast.Constant(value=""),
-                                        ),
-                                        conversion=-1,
-                                    ),
-                                    ast.FormattedValue(value=ast.Name(id="test_name", ctx=ast.Load()), conversion=-1),
-                                    ast.Constant(value=":"),
-                                    ast.FormattedValue(
-                                        value=ast.Name(id="function_name", ctx=ast.Load()), conversion=-1
-                                    ),
-                                    ast.Constant(value=":"),
-                                    ast.FormattedValue(value=ast.Name(id="loop_index", ctx=ast.Load()), conversion=-1),
-                                    ast.Constant(value=":"),
-                                    ast.FormattedValue(
-                                        value=ast.Name(id="invocation_id", ctx=ast.Load()), conversion=-1
-                                    ),
-                                    ast.Constant(value="######!"),
-                                ]
-                            )
-                        ],
-                        keywords=[],
-                    )
-                )
+                                        value=ast.Name(id='test_stdout_tag', ctx=ast.Load()),
+                                        conversion=-1),
+                                    ast.Constant(value='######$!')])],
+                        keywords=[])),
+                # ast.Expr(
+                #     value=ast.Call(
+                #         func=ast.Name(id="print", ctx=ast.Load()),
+                #         args=[
+                #             ast.JoinedStr(
+                #                 values=[
+                #                     ast.Constant(value="!######"),
+                #                     ast.FormattedValue(
+                #                         value=ast.Name(id="test_module_name", ctx=ast.Load()), conversion=-1
+                #                     ),
+                #                     ast.Constant(value=":"),
+                #                     ast.FormattedValue(
+                #                         value=ast.IfExp(
+                #                             test=ast.Name(id="test_class_name", ctx=ast.Load()),
+                #                             body=ast.BinOp(
+                #                                 left=ast.Name(id="test_class_name", ctx=ast.Load()),
+                #                                 op=ast.Add(),
+                #                                 right=ast.Constant(value="."),
+                #                             ),
+                #                             orelse=ast.Constant(value=""),
+                #                         ),
+                #                         conversion=-1,
+                #                     ),
+                #                     ast.FormattedValue(value=ast.Name(id="test_name", ctx=ast.Load()), conversion=-1),
+                #                     ast.Constant(value=":"),
+                #                     ast.FormattedValue(
+                #                         value=ast.Name(id="function_name", ctx=ast.Load()), conversion=-1
+                #                     ),
+                #                     ast.Constant(value=":"),
+                #                     ast.FormattedValue(value=ast.Name(id="loop_index", ctx=ast.Load()), conversion=-1),
+                #                     ast.Constant(value=":"),
+                #                     ast.FormattedValue(
+                #                         value=ast.Name(id="invocation_id", ctx=ast.Load()), conversion=-1
+                #                     ),
+                #                     ast.Constant(value="######!"),
+                #                 ]
+                #             )
+                #         ],
+                #         keywords=[],
+                #     )
+                # )
             ]
-            if mode == TestingMode.BEHAVIOR
-            else []
+            # if mode == TestingMode.BEHAVIOR
+            # else []
         ),
         ast.Assign(
             targets=[ast.Name(id="exception", ctx=ast.Store())], value=ast.Constant(value=None), lineno=lineno + 10
@@ -598,6 +644,18 @@ def create_wrapper_function(mode: TestingMode = TestingMode.BEHAVIOR) -> ast.Fun
                 keywords=[],
             )
         ),
+        ast.Expr(
+            value=ast.Call(
+                func=ast.Name(id='print', ctx=ast.Load()),
+                args=[
+                    ast.JoinedStr(
+                        values=[
+                            ast.Constant(value='!######'),
+                            ast.FormattedValue(
+                                value=ast.Name(id='test_stdout_tag', ctx=ast.Load()),
+                                conversion=-1),
+                            ast.Constant(value='######!')])],
+                keywords=[])),
         *(
             [
                 ast.Expr(
diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index 9dcacc1d2..7e4ae7213 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -37,7 +37,7 @@ def parse_func(file_path: Path) -> XMLParser:
 
 
 matches_re = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!")
-stdout_re = re.compile(r"!######.*?######!\n(.*)!\$######.*?######\$!", re.DOTALL)
+stdout_re = re.compile(r"!\$######.*?######\$!\n(.*)!######.*?######!", re.DOTALL)
 
 
 def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:
@@ -268,7 +268,9 @@ def parse_test_xml(
             matches = matches_re.findall(sys_stdout)
 
             if sys_stdout:
-                sys_stdout = stdout_re.search(sys_stdout).group(1)
+                print("sys_stdout: ", sys_stdout)
+                stdout_match = stdout_re.search(sys_stdout)
+                sys_stdout = stdout_match.group(1) if stdout_match else ""
 
             if not matches or not len(matches):
                 test_results.add(
diff --git a/tests/test_instrument_tests.py b/tests/test_instrument_tests.py
index 44661912a..c0f96c022 100644
--- a/tests/test_instrument_tests.py
+++ b/tests/test_instrument_tests.py
@@ -37,7 +37,8 @@
         codeflash_wrap.index[test_id] = 0
     codeflash_test_index = codeflash_wrap.index[test_id]
     invocation_id = f'{{line_id}}_{{codeflash_test_index}}'
-    print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!")
+    test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}"
+    print(f"!$######{test_stdout_tag}######$!")
     exception = None
     gc.disable()
     try:
@@ -48,6 +49,7 @@
         codeflash_duration = time.perf_counter_ns() - counter
         exception = e
     gc.enable()
+    print(f"!######{test_stdout_tag}######!")
     pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value)
     codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call'))
     codeflash_con.commit()
@@ -67,6 +69,8 @@
     codeflash_test_index = codeflash_wrap.index[test_id]
     invocation_id = f'{{line_id}}_{{codeflash_test_index}}'
     exception = None
+    test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}"
+    print(f"!$######{{test_stdout_tag}}######$!")
     gc.disable()
     try:
         counter = time.perf_counter_ns()
@@ -76,7 +80,7 @@
         codeflash_duration = time.perf_counter_ns() - counter
         exception = e
     gc.enable()
-    print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}:{{codeflash_duration}}######!")
+    print(f"!######{{test_stdout_tag}}:{{codeflash_duration}}######!")
     if exception:
         raise exception
     return return_value
@@ -124,11 +128,14 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi
         codeflash_wrap.index[test_id] = 0
     codeflash_test_index = codeflash_wrap.index[test_id]
     invocation_id = f'{{line_id}}_{{codeflash_test_index}}'
+    test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}"
     """
     if sys.version_info < (3, 12):
-        expected += """print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!")"""
+        expected += """test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}"
+    print(f"!$######{{test_stdout_tag}}######$!")"""
     else:
-        expected += """print(f'!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!')"""
+        expected += """print(f'!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!')
+    print(f'!$######{{test_stdout_tag}}######$!')"""
     expected += """
     exception = None
     gc.disable()
@@ -140,6 +147,7 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi
         codeflash_duration = time.perf_counter_ns() - counter
         exception = e
     gc.enable()
+    print(f"!######{{test_stdout_tag}}######!")
     pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value)
     codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call'))
     codeflash_con.commit()

From 8b2f9486c1263c79b75058f98368881eabdef6dc Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Mon, 2 Jun 2025 22:27:02 -0700
Subject: [PATCH 03/14] seems to be working

---
 codeflash/verification/parse_test_output.py | 58 +++++++++++++--------
 1 file changed, 35 insertions(+), 23 deletions(-)

diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index 7e4ae7213..480b40630 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -36,8 +36,8 @@ def parse_func(file_path: Path) -> XMLParser:
     return parse(file_path, xml_parser)
 
 
-matches_re = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!")
-stdout_re = re.compile(r"!\$######.*?######\$!\n(.*)!######.*?######!", re.DOTALL)
+matches_re_start = re.compile(r"!\$######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######\$!\n")
+matches_re_end = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!")
 
 
 def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:
@@ -265,14 +265,16 @@ def parse_test_xml(
                         timed_out = True
 
             sys_stdout = testcase.system_out or ""
-            matches = matches_re.findall(sys_stdout)
-
-            if sys_stdout:
-                print("sys_stdout: ", sys_stdout)
-                stdout_match = stdout_re.search(sys_stdout)
-                sys_stdout = stdout_match.group(1) if stdout_match else ""
-
-            if not matches or not len(matches):
+            begin_matches = [match for match in matches_re_start.finditer(sys_stdout)]
+            end_matches = {}
+            for match in matches_re_end.finditer(sys_stdout):
+                groups = match.groups()
+                if len(groups[5].split(":")) > 1:
+                    iteration_id = groups[5].split(":")[0]
+                    groups = groups[:5] + (iteration_id,)
+                end_matches[groups] = match
+
+            if not begin_matches or not begin_matches:
                 test_results.add(
                     FunctionTestInvocation(
                         loop_index=loop_index,
@@ -290,26 +292,36 @@ def parse_test_xml(
                         test_type=test_type,
                         return_value=None,
                         timed_out=timed_out,
-                        stdout=sys_stdout,
+                        stdout="",
                     )
                 )
 
             else:
-                for match in matches:
-                    split_val = match[5].split(":")
-                    if len(split_val) > 1:
-                        iteration_id = split_val[0]
-                        runtime = int(split_val[1])
+                for match_index, match in enumerate(begin_matches):
+                    groups = match.groups()
+                    end_match = end_matches.get(groups)
+                    iteration_id, runtime = groups[5], None
+                    if end_match:
+                        stdout = sys_stdout[match.end() : end_match.start()]
+                        split_val = end_match.groups()[5].split(":")
+                        if len(split_val) > 1:
+                            iteration_id = split_val[0]
+                            runtime = int(split_val[1])
+                        else:
+                            iteration_id, runtime = split_val[0], None
+                    elif match_index == len(begin_matches) - 1:
+                        stdout = sys_stdout[match.end() :]
                     else:
-                        iteration_id, runtime = split_val[0], None
+                        stdout = sys_stdout[match.end() : begin_matches[match_index + 1].start()]
+
                     test_results.add(
                         FunctionTestInvocation(
-                            loop_index=int(match[4]),
+                            loop_index=int(groups[4]),
                             id=InvocationId(
-                                test_module_path=match[0],
-                                test_class_name=None if match[1] == "" else match[1][:-1],
-                                test_function_name=match[2],
-                                function_getting_tested=match[3],
+                                test_module_path=groups[0],
+                                test_class_name=None if groups[1] == "" else groups[1][:-1],
+                                test_function_name=groups[2],
+                                function_getting_tested=groups[3],
                                 iteration_id=iteration_id,
                             ),
                             file_name=test_file_path,
@@ -319,7 +331,7 @@ def parse_test_xml(
                             test_type=test_type,
                             return_value=None,
                             timed_out=timed_out,
-                            stdout=sys_stdout,
+                            stdout=stdout,
                         )
                     )
 

From 22c62cac4006d12baae139757c8fa4694320e435 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Tue, 3 Jun 2025 01:13:38 -0700
Subject: [PATCH 04/14] Fix some bugs and tests

---
 .../code_utils/instrument_existing_tests.py   | 130 ++++------
 tests/test_instrument_tests.py                | 223 ++++++++++++------
 ...t_instrumentation_run_results_aiservice.py |  59 +++--
 tests/test_test_runner.py                     |   4 +-
 4 files changed, 230 insertions(+), 186 deletions(-)

diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py
index 50c3610bd..9c071996a 100644
--- a/codeflash/code_utils/instrument_existing_tests.py
+++ b/codeflash/code_utils/instrument_existing_tests.py
@@ -463,51 +463,51 @@ def create_wrapper_function(mode: TestingMode = TestingMode.BEHAVIOR) -> ast.Fun
         *(
             [
                 ast.Assign(
-                    targets=[
-                        ast.Name(id='test_stdout_tag', ctx=ast.Store())],
+                    targets=[ast.Name(id="test_stdout_tag", ctx=ast.Store())],
                     value=ast.JoinedStr(
                         values=[
-                            ast.FormattedValue(
-                                value=ast.Name(id='test_module_name', ctx=ast.Load()),
-                                conversion=-1),
-                            ast.Constant(value=':'),
+                            ast.FormattedValue(value=ast.Name(id="test_module_name", ctx=ast.Load()), conversion=-1),
+                            ast.Constant(value=":"),
                             ast.FormattedValue(
                                 value=ast.IfExp(
-                                    test=ast.Name(id='test_class_name', ctx=ast.Load()),
+                                    test=ast.Name(id="test_class_name", ctx=ast.Load()),
                                     body=ast.BinOp(
-                                        left=ast.Name(id='test_class_name', ctx=ast.Load()),
+                                        left=ast.Name(id="test_class_name", ctx=ast.Load()),
                                         op=ast.Add(),
-                                        right=ast.Constant(value='.')),
-                                    orelse=ast.Constant(value='')),
-                                conversion=-1),
-                            ast.FormattedValue(
-                                value=ast.Name(id='test_name', ctx=ast.Load()),
-                                conversion=-1),
-                            ast.Constant(value=':'),
-                            ast.FormattedValue(
-                                value=ast.Name(id='function_name', ctx=ast.Load()),
-                                conversion=-1),
-                            ast.Constant(value=':'),
-                            ast.FormattedValue(
-                                value=ast.Name(id='loop_index', ctx=ast.Load()),
-                                conversion=-1),
-                            ast.Constant(value=':'),
-                            ast.FormattedValue(
-                                value=ast.Name(id='invocation_id', ctx=ast.Load()),
-                                conversion=-1)]),
-                lineno=lineno + 9,),
+                                        right=ast.Constant(value="."),
+                                    ),
+                                    orelse=ast.Constant(value=""),
+                                ),
+                                conversion=-1,
+                            ),
+                            ast.FormattedValue(value=ast.Name(id="test_name", ctx=ast.Load()), conversion=-1),
+                            ast.Constant(value=":"),
+                            ast.FormattedValue(value=ast.Name(id="function_name", ctx=ast.Load()), conversion=-1),
+                            ast.Constant(value=":"),
+                            ast.FormattedValue(value=ast.Name(id="loop_index", ctx=ast.Load()), conversion=-1),
+                            ast.Constant(value=":"),
+                            ast.FormattedValue(value=ast.Name(id="invocation_id", ctx=ast.Load()), conversion=-1),
+                        ]
+                    ),
+                    lineno=lineno + 9,
+                ),
                 ast.Expr(
                     value=ast.Call(
-                        func=ast.Name(id='print', ctx=ast.Load()),
+                        func=ast.Name(id="print", ctx=ast.Load()),
                         args=[
                             ast.JoinedStr(
                                 values=[
-                                    ast.Constant(value='!$######'),
+                                    ast.Constant(value="!$######"),
                                     ast.FormattedValue(
-                                        value=ast.Name(id='test_stdout_tag', ctx=ast.Load()),
-                                        conversion=-1),
-                                    ast.Constant(value='######$!')])],
-                        keywords=[])),
+                                        value=ast.Name(id="test_stdout_tag", ctx=ast.Load()), conversion=-1
+                                    ),
+                                    ast.Constant(value="######$!"),
+                                ]
+                            )
+                        ],
+                        keywords=[],
+                    )
+                ),
                 # ast.Expr(
                 #     value=ast.Call(
                 #         func=ast.Name(id="print", ctx=ast.Load()),
@@ -646,66 +646,28 @@ def create_wrapper_function(mode: TestingMode = TestingMode.BEHAVIOR) -> ast.Fun
         ),
         ast.Expr(
             value=ast.Call(
-                func=ast.Name(id='print', ctx=ast.Load()),
+                func=ast.Name(id="print", ctx=ast.Load()),
                 args=[
                     ast.JoinedStr(
                         values=[
-                            ast.Constant(value='!######'),
-                            ast.FormattedValue(
-                                value=ast.Name(id='test_stdout_tag', ctx=ast.Load()),
-                                conversion=-1),
-                            ast.Constant(value='######!')])],
-                keywords=[])),
-        *(
-            [
-                ast.Expr(
-                    value=ast.Call(
-                        func=ast.Name(id="print", ctx=ast.Load()),
-                        args=[
-                            ast.JoinedStr(
-                                values=[
-                                    ast.Constant(value="!######"),
-                                    ast.FormattedValue(
-                                        value=ast.Name(id="test_module_name", ctx=ast.Load()), conversion=-1
-                                    ),
-                                    ast.Constant(value=":"),
-                                    ast.FormattedValue(
-                                        value=ast.IfExp(
-                                            test=ast.Name(id="test_class_name", ctx=ast.Load()),
-                                            body=ast.BinOp(
-                                                left=ast.Name(id="test_class_name", ctx=ast.Load()),
-                                                op=ast.Add(),
-                                                right=ast.Constant(value="."),
-                                            ),
-                                            orelse=ast.Constant(value=""),
-                                        ),
-                                        conversion=-1,
-                                    ),
-                                    ast.FormattedValue(value=ast.Name(id="test_name", ctx=ast.Load()), conversion=-1),
-                                    ast.Constant(value=":"),
-                                    ast.FormattedValue(
-                                        value=ast.Name(id="function_name", ctx=ast.Load()), conversion=-1
-                                    ),
-                                    ast.Constant(value=":"),
-                                    ast.FormattedValue(value=ast.Name(id="loop_index", ctx=ast.Load()), conversion=-1),
-                                    ast.Constant(value=":"),
-                                    ast.FormattedValue(
-                                        value=ast.Name(id="invocation_id", ctx=ast.Load()), conversion=-1
-                                    ),
+                            ast.Constant(value="!######"),
+                            ast.FormattedValue(value=ast.Name(id="test_stdout_tag", ctx=ast.Load()), conversion=-1),
+                            *(
+                                [
                                     ast.Constant(value=":"),
                                     ast.FormattedValue(
                                         value=ast.Name(id="codeflash_duration", ctx=ast.Load()), conversion=-1
                                     ),
-                                    ast.Constant(value="######!"),
                                 ]
-                            )
-                        ],
-                        keywords=[],
+                                if mode == TestingMode.PERFORMANCE
+                                else []
+                            ),
+                            ast.Constant(value="######!"),
+                        ]
                     )
-                )
-            ]
-            if mode == TestingMode.PERFORMANCE
-            else []
+                ],
+                keywords=[],
+            )
         ),
         *(
             [
diff --git a/tests/test_instrument_tests.py b/tests/test_instrument_tests.py
index c0f96c022..80c69bec9 100644
--- a/tests/test_instrument_tests.py
+++ b/tests/test_instrument_tests.py
@@ -38,7 +38,7 @@
     codeflash_test_index = codeflash_wrap.index[test_id]
     invocation_id = f'{{line_id}}_{{codeflash_test_index}}'
     test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}"
-    print(f"!$######{test_stdout_tag}######$!")
+    print(f"!$######{{test_stdout_tag}}######$!")
     exception = None
     gc.disable()
     try:
@@ -49,7 +49,7 @@
         codeflash_duration = time.perf_counter_ns() - counter
         exception = e
     gc.enable()
-    print(f"!######{test_stdout_tag}######!")
+    print(f"!######{{test_stdout_tag}}######!")
     pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value)
     codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call'))
     codeflash_con.commit()
@@ -68,9 +68,9 @@
         codeflash_wrap.index[test_id] = 0
     codeflash_test_index = codeflash_wrap.index[test_id]
     invocation_id = f'{{line_id}}_{{codeflash_test_index}}'
-    exception = None
     test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}"
     print(f"!$######{{test_stdout_tag}}######$!")
+    exception = None
     gc.disable()
     try:
         counter = time.perf_counter_ns()
@@ -128,13 +128,12 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi
         codeflash_wrap.index[test_id] = 0
     codeflash_test_index = codeflash_wrap.index[test_id]
     invocation_id = f'{{line_id}}_{{codeflash_test_index}}'
-    test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}"
     """
     if sys.version_info < (3, 12):
         expected += """test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}"
     print(f"!$######{{test_stdout_tag}}######$!")"""
     else:
-        expected += """print(f'!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!')
+        expected += """test_stdout_tag = f'{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}'
     print(f'!$######{{test_stdout_tag}}######$!')"""
     expected += """
     exception = None
@@ -147,7 +146,12 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi
         codeflash_duration = time.perf_counter_ns() - counter
         exception = e
     gc.enable()
-    print(f"!######{{test_stdout_tag}}######!")
+    """
+    if sys.version_info < (3, 12):
+        expected += """print(f"!######{{test_stdout_tag}}######!")"""
+    else:
+        expected += """print(f'!######{{test_stdout_tag}}######!')"""
+    expected += """
     pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value)
     codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call'))
     codeflash_con.commit()
@@ -235,9 +239,11 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi
     invocation_id = f'{{line_id}}_{{codeflash_test_index}}'
     """
     if sys.version_info < (3, 12):
-        expected += """print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!")"""
+        expected += """test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}"
+    print(f"!$######{{test_stdout_tag}}######$!")"""
     else:
-        expected += """print(f'!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!')"""
+        expected += """test_stdout_tag = f'{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}'
+    print(f'!$######{{test_stdout_tag}}######$!')"""
     expected += """
     exception = None
     gc.disable()
@@ -249,6 +255,12 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi
         codeflash_duration = time.perf_counter_ns() - counter
         exception = e
     gc.enable()
+    """
+    if sys.version_info < (3, 12):
+        expected += """print(f"!######{{test_stdout_tag}}######!")"""
+    else:
+        expected += """print(f'!######{{test_stdout_tag}}######!')"""
+    expected += """
     pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value)
     codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call'))
     codeflash_con.commit()
@@ -489,6 +501,12 @@ def test_sort():
         assert test_results_perf[0].runtime > 0
         assert test_results_perf[0].did_pass
         assert test_results_perf[0].return_value is None
+        assert (
+            test_results_perf[0].stdout
+            == """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5]
+"""
+        )
 
         assert test_results_perf[1].id.function_getting_tested == "sorter"
         assert test_results_perf[1].id.iteration_id == "4_0"
@@ -500,12 +518,11 @@ def test_sort():
         )
         assert test_results_perf[1].runtime > 0
         assert test_results_perf[1].did_pass
-        out_str = """codeflash stdout: Sorting list
-result: [0, 1, 2, 3, 4, 5]
 
-codeflash stdout: Sorting list
-result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]"""
-        assert out_str == test_results_perf[1].stdout
+        out_str = """codeflash stdout: Sorting list
+result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+"""
+        assert test_results_perf[1].stdout == out_str
         ctx_result = func_optimizer.get_code_optimization_context()
         code_context: CodeOptimizationContext = ctx_result.unwrap()
         original_helper_code: dict[Path, str] = {}
@@ -515,8 +532,7 @@ def test_sort():
                 helper_code = f.read()
                 original_helper_code[helper_function_path] = helper_code
         computed_fn_opt = True
-        line_profiler_output_file = add_decorator_imports(
-            func_optimizer.function_to_optimize, code_context)
+        line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context)
         line_profile_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.LINE_PROFILE,
             test_env=test_env,
@@ -525,14 +541,16 @@ def test_sort():
             pytest_min_loops=1,
             pytest_max_loops=1,
             testing_time=0.1,
-            line_profiler_output_file = line_profiler_output_file
+            line_profiler_output_file=line_profiler_output_file,
         )
         tmp_lpr = list(line_profile_results["timings"].keys())
-        assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1]==2
+        assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1] == 2
     finally:
         if computed_fn_opt:
             func_optimizer.write_code_and_helpers(
-                func_optimizer.function_to_optimize_source_code, original_helper_code, func_optimizer.function_to_optimize.file_path
+                func_optimizer.function_to_optimize_source_code,
+                original_helper_code,
+                func_optimizer.function_to_optimize.file_path,
             )
         test_path.unlink(missing_ok=True)
         test_path_perf.unlink(missing_ok=True)
@@ -690,6 +708,12 @@ def test_sort_parametrized(input, expected_output):
         )
         assert test_results[0].runtime > 0
         assert test_results[0].did_pass
+        assert (
+            test_results[0].stdout
+            == """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5]
+"""
+        )
 
         assert test_results[1].id.function_getting_tested == "sorter"
         assert test_results[1].id.iteration_id == "0_1"
@@ -701,6 +725,12 @@ def test_sort_parametrized(input, expected_output):
         )
         assert test_results[1].runtime > 0
         assert test_results[1].did_pass
+        assert (
+            test_results[1].stdout
+            == """codeflash stdout: Sorting list
+result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+"""
+        )
 
         assert test_results[2].id.function_getting_tested == "sorter"
         assert test_results[2].id.iteration_id == "0_2"
@@ -746,7 +776,8 @@ def test_sort_parametrized(input, expected_output):
         assert test_results_perf[1].did_pass
 
         out_str = """codeflash stdout: Sorting list
-result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]"""
+result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+"""
         assert out_str == test_results_perf[1].stdout
 
         assert test_results_perf[2].id.function_getting_tested == "sorter"
@@ -768,8 +799,7 @@ def test_sort_parametrized(input, expected_output):
                 helper_code = f.read()
                 original_helper_code[helper_function_path] = helper_code
         computed_fn_opt = True
-        line_profiler_output_file = add_decorator_imports(
-            func_optimizer.function_to_optimize, code_context)
+        line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context)
         line_profile_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.LINE_PROFILE,
             test_env=test_env,
@@ -778,14 +808,16 @@ def test_sort_parametrized(input, expected_output):
             pytest_min_loops=1,
             pytest_max_loops=1,
             testing_time=0.1,
-            line_profiler_output_file = line_profiler_output_file
+            line_profiler_output_file=line_profiler_output_file,
         )
         tmp_lpr = list(line_profile_results["timings"].keys())
-        assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1]==3
+        assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1] == 3
     finally:
         if computed_fn_opt:
             func_optimizer.write_code_and_helpers(
-                func_optimizer.function_to_optimize_source_code, original_helper_code, func_optimizer.function_to_optimize.file_path
+                func_optimizer.function_to_optimize_source_code,
+                original_helper_code,
+                func_optimizer.function_to_optimize.file_path,
             )
         test_path.unlink(missing_ok=True)
         test_path_perf.unlink(missing_ok=True)
@@ -962,7 +994,10 @@ def test_sort_parametrized_loop(input, expected_output):
         assert test_results[0].runtime > 0
         assert test_results[0].did_pass
         assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],)
-
+        out_str = """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5]
+"""
+        assert test_results[0].stdout == out_str
         assert test_results[1].id.function_getting_tested == "sorter"
         assert test_results[1].id.iteration_id == "0_0_1"
         assert test_results[1].id.test_class_name is None
@@ -973,6 +1008,7 @@ def test_sort_parametrized_loop(input, expected_output):
         )
         assert test_results[1].runtime > 0
         assert test_results[1].did_pass
+        assert test_results[1].stdout == out_str
 
         assert test_results[2].id.function_getting_tested == "sorter"
         assert test_results[2].id.iteration_id == "0_0_2"
@@ -984,6 +1020,10 @@ def test_sort_parametrized_loop(input, expected_output):
         )
         assert test_results[2].runtime > 0
         assert test_results[2].did_pass
+        out_str = """codeflash stdout: Sorting list
+result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+"""
+        assert test_results[2].stdout == out_str
 
         assert test_results[3].id.function_getting_tested == "sorter"
         assert test_results[3].id.iteration_id == "0_0_3"
@@ -996,6 +1036,8 @@ def test_sort_parametrized_loop(input, expected_output):
         assert test_results[3].runtime > 0
         assert test_results[3].did_pass
 
+        assert test_results[3].stdout == out_str
+
         assert test_results[4].id.function_getting_tested == "sorter"
         assert test_results[4].id.iteration_id == "0_0_4"
         assert test_results[4].id.test_class_name is None
@@ -1006,6 +1048,10 @@ def test_sort_parametrized_loop(input, expected_output):
         )
         assert test_results[4].runtime > 0
         assert test_results[4].did_pass
+        out_str = """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
+"""
+        assert test_results[4].stdout == out_str
 
         assert test_results[5].id.function_getting_tested == "sorter"
         assert test_results[5].id.iteration_id == "0_0_5"
@@ -1017,6 +1063,7 @@ def test_sort_parametrized_loop(input, expected_output):
         )
         assert test_results[5].runtime > 0
         assert test_results[5].did_pass
+        assert test_results[5].stdout == out_str
 
         test_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
@@ -1107,8 +1154,7 @@ def test_sort_parametrized_loop(input, expected_output):
                 helper_code = f.read()
                 original_helper_code[helper_function_path] = helper_code
         computed_fn_opt = True
-        line_profiler_output_file = add_decorator_imports(
-            func_optimizer.function_to_optimize, code_context)
+        line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context)
         line_profile_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.LINE_PROFILE,
             test_env=test_env,
@@ -1117,14 +1163,16 @@ def test_sort_parametrized_loop(input, expected_output):
             pytest_min_loops=1,
             pytest_max_loops=1,
             testing_time=0.1,
-            line_profiler_output_file = line_profiler_output_file
+            line_profiler_output_file=line_profiler_output_file,
         )
         tmp_lpr = list(line_profile_results["timings"].keys())
-        assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1]==6
+        assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1] == 6
     finally:
         if computed_fn_opt:
             func_optimizer.write_code_and_helpers(
-                func_optimizer.function_to_optimize_source_code, original_helper_code, func_optimizer.function_to_optimize.file_path
+                func_optimizer.function_to_optimize_source_code,
+                original_helper_code,
+                func_optimizer.function_to_optimize.file_path,
             )
         test_path.unlink(missing_ok=True)
         test_path_behavior.unlink(missing_ok=True)
@@ -1343,13 +1391,9 @@ def test_sort():
         assert test_results[0].return_value is None
         out_str = """codeflash stdout: Sorting list
 result: [0, 1, 2, 3, 4, 5]
+"""
 
-codeflash stdout: Sorting list
-result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
-
-codeflash stdout: Sorting list
-result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]"""
-        assert test_results[1].stdout == out_str
+        assert test_results[0].stdout == out_str
         assert test_results[1].id.function_getting_tested == "sorter"
         assert test_results[1].id.iteration_id == "2_2_1"
         assert test_results[1].id.test_class_name is None
@@ -1360,6 +1404,10 @@ def test_sort():
         )
         assert test_results[1].runtime > 0
         assert test_results[1].did_pass
+        out_str2 = """codeflash stdout: Sorting list
+result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+"""
+        assert test_results[1].stdout == out_str2
 
         assert test_results[2].id.function_getting_tested == "sorter"
         assert test_results[2].id.iteration_id == "2_2_2"
@@ -1371,6 +1419,10 @@ def test_sort():
         )
         assert test_results[2].runtime > 0
         assert test_results[2].did_pass
+        out_str3 = """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
+"""
+        assert test_results[2].stdout == out_str3
         ctx_result = func_optimizer.get_code_optimization_context()
         code_context: CodeOptimizationContext = ctx_result.unwrap()
         original_helper_code: dict[Path, str] = {}
@@ -1380,8 +1432,7 @@ def test_sort():
                 helper_code = f.read()
                 original_helper_code[helper_function_path] = helper_code
         computed_fn_opt = True
-        line_profiler_output_file = add_decorator_imports(
-            func_optimizer.function_to_optimize, code_context)
+        line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context)
         line_profile_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.LINE_PROFILE,
             test_env=test_env,
@@ -1390,14 +1441,16 @@ def test_sort():
             pytest_min_loops=1,
             pytest_max_loops=1,
             testing_time=0.1,
-            line_profiler_output_file = line_profiler_output_file
+            line_profiler_output_file=line_profiler_output_file,
         )
         tmp_lpr = list(line_profile_results["timings"].keys())
-        assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1]==3
+        assert len(tmp_lpr) == 1 and line_profile_results["timings"][tmp_lpr[0]][0][1] == 3
     finally:
         if computed_fn_opt is True:
             func_optimizer.write_code_and_helpers(
-                func_optimizer.function_to_optimize_source_code, original_helper_code, func_optimizer.function_to_optimize.file_path
+                func_optimizer.function_to_optimize_source_code,
+                original_helper_code,
+                func_optimizer.function_to_optimize.file_path,
             )
         test_path.unlink(missing_ok=True)
         test_path_perf.unlink(missing_ok=True)
@@ -1602,6 +1655,10 @@ def test_sort(self):
         assert test_results[0].runtime > 0
         assert test_results[0].did_pass
         assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],)
+        out_str = """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5]
+"""
+        assert test_results[0].stdout == out_str
 
         assert test_results[1].id.function_getting_tested == "sorter"
         assert test_results[1].id.iteration_id == "4_0"
@@ -1613,6 +1670,10 @@ def test_sort(self):
         )
         assert test_results[1].runtime > 0
         assert test_results[1].did_pass
+        out_str = """codeflash stdout: Sorting list
+result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+"""
+        assert test_results[1].stdout == out_str
 
         assert test_results[2].id.function_getting_tested == "sorter"
         assert test_results[2].id.iteration_id == "7_0"
@@ -1665,6 +1726,10 @@ def test_sort(self):
         )
         assert test_results[2].runtime > 0
         assert test_results[2].did_pass
+        out_str = """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
+"""
+        assert test_results[2].stdout == out_str
     finally:
         test_path.unlink(missing_ok=True)
         test_path_behavior.unlink(missing_ok=True)
@@ -1849,6 +1914,10 @@ def test_sort(self, input, expected_output):
         assert test_results[0].runtime > 0
         assert test_results[0].did_pass
         assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],)
+        out_str = """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5]
+"""
+        assert test_results[0].stdout == out_str
 
         assert test_results[1].id.function_getting_tested == "sorter"
         assert test_results[1].id.iteration_id == "0_1"
@@ -1860,6 +1929,10 @@ def test_sort(self, input, expected_output):
         )
         assert test_results[1].runtime > 0
         assert test_results[1].did_pass
+        out_str = """codeflash stdout: Sorting list
+result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+"""
+        assert test_results[1].stdout == out_str
 
         assert test_results[2].id.function_getting_tested == "sorter"
         assert test_results[2].id.iteration_id == "0_2"
@@ -1871,6 +1944,10 @@ def test_sort(self, input, expected_output):
         )
         assert test_results[2].runtime > 0
         assert test_results[2].did_pass
+        out_str = """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
+"""
+        assert test_results[2].stdout == out_str
 
         test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
@@ -2100,6 +2177,10 @@ def test_sort(self):
         assert test_results[0].runtime > 0
         assert test_results[0].did_pass
         assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],)
+        out_str = """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5]
+"""
+        assert test_results[0].stdout == out_str
 
         assert test_results[1].id.function_getting_tested == "sorter"
         assert test_results[1].id.iteration_id == "2_2_1"
@@ -2111,6 +2192,10 @@ def test_sort(self):
         )
         assert test_results[1].runtime > 0
         assert test_results[1].did_pass
+        out_str = """codeflash stdout: Sorting list
+result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+"""
+        assert test_results[1].stdout == out_str
 
         assert test_results[2].id.function_getting_tested == "sorter"
         assert test_results[2].id.iteration_id == "2_2_2"
@@ -2122,6 +2207,10 @@ def test_sort(self):
         )
         assert test_results[2].runtime > 0
         assert test_results[2].did_pass
+        out_str = """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
+"""
+        assert test_results[2].stdout == out_str
 
         test_results, coverage_data = func_optimizer.run_and_parse_tests(
             test_env=test_env,
@@ -2349,6 +2438,10 @@ def test_sort(self, input, expected_output):
         assert test_results[0].runtime > 0
         assert test_results[0].did_pass
         assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],)
+        out_str = """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5]
+"""
+        assert test_results[0].stdout == out_str
 
         assert test_results[1].id.function_getting_tested == "sorter"
         assert test_results[1].id.iteration_id == "0_0_1"
@@ -2360,6 +2453,10 @@ def test_sort(self, input, expected_output):
         )
         assert test_results[1].runtime > 0
         assert test_results[1].did_pass
+        out_str = """codeflash stdout: Sorting list
+result: [0, 1, 2, 3, 4, 5]
+"""
+        assert test_results[1].stdout == out_str
 
         assert test_results[2].id.function_getting_tested == "sorter"
         assert test_results[2].id.iteration_id == "0_0_2"
@@ -2371,6 +2468,10 @@ def test_sort(self, input, expected_output):
         )
         assert test_results[2].runtime > 0
         assert test_results[2].did_pass
+        out_str = """codeflash stdout: Sorting list
+result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+"""
+        assert test_results[2].stdout == out_str
 
         assert test_results[3].id.function_getting_tested == "sorter"
         assert test_results[3].id.iteration_id == "0_0_3"
@@ -2829,7 +2930,8 @@ def test_code_replacement10() -> None:
         assert code_context.testgen_context_code == get_code_output
     """
 
-    expected = """import gc
+    expected = (
+        """import gc
 import os
 import sqlite3
 import time
@@ -2839,39 +2941,9 @@ def test_code_replacement10() -> None:
 from codeflash.optimization.optimizer import Optimizer
 
 
-def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, function_name, line_id, loop_index, codeflash_cur, codeflash_con, *args, **kwargs):
-    test_id = f'{{test_module_name}}:{{test_class_name}}:{{test_name}}:{{line_id}}:{{loop_index}}'
-    if not hasattr(codeflash_wrap, 'index'):
-        codeflash_wrap.index = {{}}
-    if test_id in codeflash_wrap.index:
-        codeflash_wrap.index[test_id] += 1
-    else:
-        codeflash_wrap.index[test_id] = 0
-    codeflash_test_index = codeflash_wrap.index[test_id]
-    invocation_id = f'{{line_id}}_{{codeflash_test_index}}'
 """
-    if sys.version_info < (3, 12):
-        expected += """    print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!")"""
-    else:
-        expected += """    print(f'!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!')"""
-    expected += """
-    exception = None
-    gc.disable()
-    try:
-        counter = time.perf_counter_ns()
-        return_value = wrapped(*args, **kwargs)
-        codeflash_duration = time.perf_counter_ns() - counter
-    except Exception as e:
-        codeflash_duration = time.perf_counter_ns() - counter
-        exception = e
-    gc.enable()
-    pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value)
-    codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call'))
-    codeflash_con.commit()
-    if exception:
-        raise exception
-    return return_value
-
+        + codeflash_wrap_string
+        + """
 def test_code_replacement10() -> None:
     codeflash_loop_index = int(os.environ['CODEFLASH_LOOP_INDEX'])
     codeflash_iteration = os.environ['CODEFLASH_TEST_ITERATION']
@@ -2890,6 +2962,7 @@ def test_code_replacement10() -> None:
         assert code_context.testgen_context_code == get_code_output
     codeflash_con.close()
 """
+    )
 
     with tempfile.NamedTemporaryFile(mode="w") as f:
         f.write(code)
@@ -2907,7 +2980,7 @@ def test_code_replacement10() -> None:
         )
         os.chdir(original_cwd)
     assert success
-    assert new_test == expected.format(
+    assert new_test.replace('"', "'") == expected.replace('"', "'").format(
         module_path=Path(f.name).name, tmp_dir_path=get_run_tmp_file(Path("test_return_values"))
     )
 
diff --git a/tests/test_instrumentation_run_results_aiservice.py b/tests/test_instrumentation_run_results_aiservice.py
index c1a759681..ddfd8de8c 100644
--- a/tests/test_instrumentation_run_results_aiservice.py
+++ b/tests/test_instrumentation_run_results_aiservice.py
@@ -6,7 +6,6 @@
 from pathlib import Path
 
 import isort
-
 from code_to_optimize.bubble_sort_method import BubbleSorter
 from codeflash.code_utils.code_utils import get_run_tmp_file
 from codeflash.discovery.functions_to_optimize import FunctionToOptimize
@@ -16,17 +15,18 @@
 from codeflash.verification.instrument_codeflash_capture import instrument_codeflash_capture
 
 # Used by aiservice instrumentation
-behavior_logging_code = """from __future__ import annotations
+behavior_logging_code = """
+from __future__ import annotations
 
 import gc
 import inspect
 import os
 import time
+import dill as pickle
 
 from pathlib import Path
 from typing import Any, Callable, Optional
 
-import dill as pickle
 
 def codeflash_wrap(
     wrapped: Callable[..., Any],
@@ -48,8 +48,9 @@ def codeflash_wrap(
         codeflash_wrap.index[test_id] = 0
     codeflash_test_index = codeflash_wrap.index[test_id]
     invocation_id = f"{line_id}_{codeflash_test_index}"
+    test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}"
     print(
-        f"!######{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}######!"
+        f"!$######{test_stdout_tag}######$!"
     )
     exception = None
     gc.disable()
@@ -61,6 +62,7 @@ def codeflash_wrap(
         codeflash_duration = time.perf_counter_ns() - counter
         exception = e
     gc.enable()
+    print(f"!######{test_stdout_tag}######!")
     iteration = os.environ["CODEFLASH_TEST_ITERATION"]
     with Path(
         "{codeflash_run_tmp_dir_client_side}", f"test_return_values_{iteration}.bin"
@@ -178,7 +180,7 @@ def test_single_element_list():
             testing_time=0.1,
         )
         assert test_results[0].id.function_getting_tested == "sorter"
-        assert test_results[0].stdout == "codeflash stdout : BubbleSorter.sorter() called"
+        assert test_results[0].stdout == "codeflash stdout : BubbleSorter.sorter() called\n"
         assert test_results[0].id.test_function_name == "test_single_element_list"
         assert test_results[0].did_pass
         assert test_results[0].return_value[1]["arr"] == [42]
@@ -222,7 +224,7 @@ def sorter(self, arr):
             test_results, test_results_mutated_attr
         )  # Without codeflash capture, the init state was not verified, and the results are verified as correct even with the attribute mutated
 
-        assert test_results_mutated_attr[0].stdout == "codeflash stdout : BubbleSorter.sorter() called"
+        assert test_results_mutated_attr[0].stdout == "codeflash stdout : BubbleSorter.sorter() called\n"
     finally:
         fto_path.write_text(original_code, "utf-8")
         test_path.unlink(missing_ok=True)
@@ -319,23 +321,32 @@ def test_single_element_list():
             testing_time=0.1,
         )
         # Verify instance_state result, which checks instance state right after __init__, using  codeflash_capture
-        assert test_results[0].id.function_getting_tested == "BubbleSorter.__init__"
-        assert test_results[0].id.test_function_name == "test_single_element_list"
-        assert test_results[0].did_pass
-        assert test_results[0].return_value[0] == {"x": 0}
-        assert test_results[0].stdout == "codeflash stdout : BubbleSorter.sorter() called"
 
         # Verify function_to_optimize result
-        assert test_results[1].id.function_getting_tested == "sorter"
-        assert test_results[1].id.test_function_name == "test_single_element_list"
-        assert test_results[1].did_pass
+        assert test_results[0].id.function_getting_tested == "sorter"
+        assert test_results[0].id.test_function_name == "test_single_element_list"
+        assert test_results[0].did_pass
 
         # Checks input values to the function to see if they have mutated
         # assert comparator(test_results[1].return_value[1]["self"], BubbleSorter()) TODO: add self as input
-        assert test_results[1].return_value[1]["arr"] == [1, 2, 3]
+        assert test_results[0].return_value[1]["arr"] == [1, 2, 3]
 
         # Check function return value
-        assert test_results[1].return_value[2] == [1, 2, 3]
+        assert test_results[0].return_value[2] == [1, 2, 3]
+        assert (
+            test_results[0].stdout
+            == """codeflash stdout : BubbleSorter.sorter() called
+"""
+        )
+        assert test_results[1].id.function_getting_tested == "BubbleSorter.__init__"
+        assert test_results[1].id.test_function_name == "test_single_element_list"
+        assert test_results[1].did_pass
+        assert test_results[1].return_value[0] == {"x": 0}
+        assert (
+            test_results[1].stdout
+            == """codeflash stdout : BubbleSorter.sorter() called
+"""
+        )
         # Replace with optimized code that mutated instance attribute
         optimized_code_mutated_attr = """
 import sys
@@ -390,10 +401,10 @@ def sorter(self, arr):
             testing_time=0.1,
         )
         # assert test_results_mutated_attr[0].return_value[0]["self"].x == 1 TODO: add self as input
-        assert test_results_mutated_attr[0].id.function_getting_tested == "BubbleSorter.__init__"
-        assert test_results_mutated_attr[0].return_value[0] == {"x": 1}
-        assert test_results_mutated_attr[0].verification_type == VerificationType.INIT_STATE_FTO
-        assert test_results_mutated_attr[0].stdout == "codeflash stdout : BubbleSorter.sorter() called"
+        assert test_results_mutated_attr[1].id.function_getting_tested == "BubbleSorter.__init__"
+        assert test_results_mutated_attr[1].return_value[0] == {"x": 1}
+        assert test_results_mutated_attr[1].verification_type == VerificationType.INIT_STATE_FTO
+        assert test_results_mutated_attr[1].stdout == "codeflash stdout : BubbleSorter.sorter() called\n"
         assert not compare_test_results(
             test_results, test_results_mutated_attr
         )  # The test should fail because the instance attribute was mutated
@@ -442,10 +453,10 @@ def sorter(self, arr):
             pytest_max_loops=1,
             testing_time=0.1,
         )
-        assert test_results_new_attr[0].id.function_getting_tested == "BubbleSorter.__init__"
-        assert test_results_new_attr[0].return_value[0] == {"x": 0, "y": 2}
-        assert test_results_new_attr[0].verification_type == VerificationType.INIT_STATE_FTO
-        assert test_results_new_attr[0].stdout == "codeflash stdout : BubbleSorter.sorter() called"
+        assert test_results_new_attr[1].id.function_getting_tested == "BubbleSorter.__init__"
+        assert test_results_new_attr[1].return_value[0] == {"x": 0, "y": 2}
+        assert test_results_new_attr[1].verification_type == VerificationType.INIT_STATE_FTO
+        assert test_results_new_attr[1].stdout == "codeflash stdout : BubbleSorter.sorter() called\n"
         # assert test_results_new_attr[1].return_value[1]["self"].x == 0 TODO: add self as input
         # assert test_results_new_attr[1].return_value[1]["self"].y == 2 TODO: add self as input
         assert compare_test_results(
diff --git a/tests/test_test_runner.py b/tests/test_test_runner.py
index 0e80b76e0..5dc6df678 100644
--- a/tests/test_test_runner.py
+++ b/tests/test_test_runner.py
@@ -1,5 +1,3 @@
-import re
-
 import os
 import tempfile
 from pathlib import Path
@@ -145,5 +143,5 @@ def test_sort():
             test_xml_file_path=result_file, test_files=test_files, test_config=config, run_result=process
         )
     match = ImportErrorPattern.search(process.stdout).group()
-    assert match=="ModuleNotFoundError: No module named 'torch'"
+    assert match == "ModuleNotFoundError: No module named 'torch'"
     result_file.unlink(missing_ok=True)

From bc47662b9bf493dcfebdd6d793765680468886c3 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Tue, 3 Jun 2025 01:16:26 -0700
Subject: [PATCH 05/14] remove spare comments

---
 .../code_utils/instrument_existing_tests.py   | 43 -------------------
 1 file changed, 43 deletions(-)

diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py
index 9c071996a..6eac52809 100644
--- a/codeflash/code_utils/instrument_existing_tests.py
+++ b/codeflash/code_utils/instrument_existing_tests.py
@@ -508,50 +508,7 @@ def create_wrapper_function(mode: TestingMode = TestingMode.BEHAVIOR) -> ast.Fun
                         keywords=[],
                     )
                 ),
-                # ast.Expr(
-                #     value=ast.Call(
-                #         func=ast.Name(id="print", ctx=ast.Load()),
-                #         args=[
-                #             ast.JoinedStr(
-                #                 values=[
-                #                     ast.Constant(value="!######"),
-                #                     ast.FormattedValue(
-                #                         value=ast.Name(id="test_module_name", ctx=ast.Load()), conversion=-1
-                #                     ),
-                #                     ast.Constant(value=":"),
-                #                     ast.FormattedValue(
-                #                         value=ast.IfExp(
-                #                             test=ast.Name(id="test_class_name", ctx=ast.Load()),
-                #                             body=ast.BinOp(
-                #                                 left=ast.Name(id="test_class_name", ctx=ast.Load()),
-                #                                 op=ast.Add(),
-                #                                 right=ast.Constant(value="."),
-                #                             ),
-                #                             orelse=ast.Constant(value=""),
-                #                         ),
-                #                         conversion=-1,
-                #                     ),
-                #                     ast.FormattedValue(value=ast.Name(id="test_name", ctx=ast.Load()), conversion=-1),
-                #                     ast.Constant(value=":"),
-                #                     ast.FormattedValue(
-                #                         value=ast.Name(id="function_name", ctx=ast.Load()), conversion=-1
-                #                     ),
-                #                     ast.Constant(value=":"),
-                #                     ast.FormattedValue(value=ast.Name(id="loop_index", ctx=ast.Load()), conversion=-1),
-                #                     ast.Constant(value=":"),
-                #                     ast.FormattedValue(
-                #                         value=ast.Name(id="invocation_id", ctx=ast.Load()), conversion=-1
-                #                     ),
-                #                     ast.Constant(value="######!"),
-                #                 ]
-                #             )
-                #         ],
-                #         keywords=[],
-                #     )
-                # )
             ]
-            # if mode == TestingMode.BEHAVIOR
-            # else []
         ),
         ast.Assign(
             targets=[ast.Name(id="exception", ctx=ast.Store())], value=ast.Constant(value=None), lineno=lineno + 10

From 80baa7857db0c5c17e2fee37f3ec672b97ad00f7 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Tue, 3 Jun 2025 01:27:33 -0700
Subject: [PATCH 06/14] fix more tests

---
 tests/test_instrument_all_and_run.py | 69 ++++++++++------------------
 1 file changed, 23 insertions(+), 46 deletions(-)

diff --git a/tests/test_instrument_all_and_run.py b/tests/test_instrument_all_and_run.py
index 5bc942fdd..15676aa7a 100644
--- a/tests/test_instrument_all_and_run.py
+++ b/tests/test_instrument_all_and_run.py
@@ -25,7 +25,8 @@
         codeflash_wrap.index[test_id] = 0
     codeflash_test_index = codeflash_wrap.index[test_id]
     invocation_id = f'{{line_id}}_{{codeflash_test_index}}'
-    print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!")
+    test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}"
+    print(f"!$######{{test_stdout_tag}}######$!")
     exception = None
     gc.disable()
     try:
@@ -36,6 +37,7 @@
         codeflash_duration = time.perf_counter_ns() - counter
         exception = e
     gc.enable()
+    print(f"!######{{test_stdout_tag}}######!")
     pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value)
     codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call'))
     codeflash_con.commit()
@@ -170,9 +172,8 @@ def test_sort():
 
         out_str = """codeflash stdout: Sorting list
 result: [0, 1, 2, 3, 4, 5]
-
-codeflash stdout: Sorting list
-result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]"""
+"""
+        assert test_results[0].stdout == out_str
         assert out_str == test_results[0].stdout
         assert test_results[0].id.function_getting_tested == "sorter"
         assert test_results[0].id.iteration_id == "1_0"
@@ -185,7 +186,10 @@ def test_sort():
         assert test_results[0].runtime > 0
         assert test_results[0].did_pass
         assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],)
-        assert out_str == test_results[1].stdout.strip()
+        out_str = """codeflash stdout: Sorting list
+result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+"""
+        assert out_str == test_results[1].stdout
 
         assert test_results[1].id.function_getting_tested == "sorter"
         assert test_results[1].id.iteration_id == "4_0"
@@ -197,6 +201,10 @@ def test_sort():
         )
         assert test_results[1].runtime > 0
         assert test_results[1].did_pass
+        out_str = """codeflash stdout: Sorting list
+result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+"""
+        assert test_results[1].stdout == out_str
         results2, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
@@ -208,10 +216,8 @@ def test_sort():
         )
         out_str = """codeflash stdout: Sorting list
 result: [0, 1, 2, 3, 4, 5]
-
-codeflash stdout: Sorting list
-result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]"""
-        assert out_str == results2[0].stdout.strip()
+"""
+        assert out_str == results2[0].stdout
         assert compare_test_results(test_results, results2)
     finally:
         fto_path.write_text(original_code, "utf-8")
@@ -234,7 +240,8 @@ def test_sort():
     output = sort_class.sorter(input)
     assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]"""
 
-    expected = """import gc
+    expected = (
+        """import gc
 import os
 import sqlite3
 import time
@@ -242,42 +249,9 @@ def test_sort():
 import dill as pickle
 
 from code_to_optimize.bubble_sort_method import BubbleSorter
-
-
-def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, function_name, line_id, loop_index, codeflash_cur, codeflash_con, *args, **kwargs):
-    test_id = f'{{test_module_name}}:{{test_class_name}}:{{test_name}}:{{line_id}}:{{loop_index}}'
-    if not hasattr(codeflash_wrap, 'index'):
-        codeflash_wrap.index = {{}}
-    if test_id in codeflash_wrap.index:
-        codeflash_wrap.index[test_id] += 1
-    else:
-        codeflash_wrap.index[test_id] = 0
-    codeflash_test_index = codeflash_wrap.index[test_id]
-    invocation_id = f'{{line_id}}_{{codeflash_test_index}}'
-    """
-    if sys.version_info < (3, 12):
-        expected += """print(f"!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!")"""
-    else:
-        expected += """print(f'!######{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}######!')"""
-    expected += """
-    exception = None
-    gc.disable()
-    try:
-        counter = time.perf_counter_ns()
-        return_value = wrapped(*args, **kwargs)
-        codeflash_duration = time.perf_counter_ns() - counter
-    except Exception as e:
-        codeflash_duration = time.perf_counter_ns() - counter
-        exception = e
-    gc.enable()
-    pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value)
-    codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call'))
-    codeflash_con.commit()
-    if exception:
-        raise exception
-    return return_value
 """
-    expected += """
+        + codeflash_wrap_string
+        + """
 def test_sort():
     codeflash_loop_index = int(os.environ['CODEFLASH_LOOP_INDEX'])
     codeflash_iteration = os.environ['CODEFLASH_TEST_ITERATION']
@@ -294,6 +268,7 @@ def test_sort():
     assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
     codeflash_con.close()
 """
+    )
     fto_path = (Path(__file__).parent.resolve() / "../code_to_optimize/bubble_sort_method.py").resolve()
     original_code = fto_path.read_text("utf-8")
     fto = FunctionToOptimize(
@@ -379,7 +354,9 @@ def test_sort():
         assert test_results[1].runtime > 0
         assert test_results[1].did_pass
         assert test_results[1].return_value == ([0, 1, 2, 3, 4, 5],)
-        out_str = """codeflash stdout : BubbleSorter.sorter() called\n\n\ncodeflash stdout : BubbleSorter.sorter() called"""
+        out_str = (
+            """codeflash stdout : BubbleSorter.sorter() called\n\n\ncodeflash stdout : BubbleSorter.sorter() called"""
+        )
         assert test_results[1].stdout == out_str
         assert compare_test_results(test_results, test_results)
         assert test_results[2].id.function_getting_tested == "BubbleSorter.__init__"

From 29b2f2172e174914a904af1d2f8a8a8f425bd721 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Tue, 3 Jun 2025 02:04:30 -0700
Subject: [PATCH 07/14] fix more tests

---
 tests/test_instrument_all_and_run.py |  3 ++-
 tests/test_instrument_tests.py       | 27 ++++++++++++++++++---------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/tests/test_instrument_all_and_run.py b/tests/test_instrument_all_and_run.py
index 15676aa7a..812533521 100644
--- a/tests/test_instrument_all_and_run.py
+++ b/tests/test_instrument_all_and_run.py
@@ -247,8 +247,9 @@ def test_sort():
 import time
 
 import dill as pickle
-
 from code_to_optimize.bubble_sort_method import BubbleSorter
+
+
 """
         + codeflash_wrap_string
         + """
diff --git a/tests/test_instrument_tests.py b/tests/test_instrument_tests.py
index 80c69bec9..c5fd19383 100644
--- a/tests/test_instrument_tests.py
+++ b/tests/test_instrument_tests.py
@@ -307,10 +307,12 @@ def test_prepare_image_for_yolo():
 def test_perfinjector_bubble_sort_results() -> None:
     computed_fn_opt = False
     code = """from code_to_optimize.bubble_sort import sorter
+import datetime
 
 
 def test_sort():
     input = [5, 4, 3, 2, 1, 0]
+    print(datetime.datetime.now().isoformat())
     output = sorter(input)
     assert output == [0, 1, 2, 3, 4, 5]
 
@@ -319,7 +321,8 @@ def test_sort():
     assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]"""
 
     expected = (
-        """import gc
+        """import datetime
+import gc
 import os
 import sqlite3
 import time
@@ -339,17 +342,19 @@ def test_sort():
     codeflash_cur = codeflash_con.cursor()
     codeflash_cur.execute('CREATE TABLE IF NOT EXISTS test_results (test_module_path TEXT, test_class_name TEXT, test_function_name TEXT, function_getting_tested TEXT, loop_index INTEGER, iteration_id TEXT, runtime INTEGER, return_value BLOB, verification_type TEXT)')
     input = [5, 4, 3, 2, 1, 0]
-    output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '1', codeflash_loop_index, codeflash_cur, codeflash_con, input)
+    print(datetime.datetime.now().isoformat())
+    output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '2', codeflash_loop_index, codeflash_cur, codeflash_con, input)
     assert output == [0, 1, 2, 3, 4, 5]
     input = [5.0, 4.0, 3.0, 2.0, 1.0, 0.0]
-    output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '4', codeflash_loop_index, codeflash_cur, codeflash_con, input)
+    output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '5', codeflash_loop_index, codeflash_cur, codeflash_con, input)
     assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
     codeflash_con.close()
 """
     )
 
     expected_perfonly = (
-        """import gc
+        """import datetime
+import gc
 import os
 import time
 
@@ -362,10 +367,10 @@ def test_sort():
 def test_sort():
     codeflash_loop_index = int(os.environ['CODEFLASH_LOOP_INDEX'])
     input = [5, 4, 3, 2, 1, 0]
-    output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '1', codeflash_loop_index, input)
+    output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '2', codeflash_loop_index, input)
     assert output == [0, 1, 2, 3, 4, 5]
     input = [5.0, 4.0, 3.0, 2.0, 1.0, 0.0]
-    output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '4', codeflash_loop_index, input)
+    output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '5', codeflash_loop_index, input)
     assert output == [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
 """
     )
@@ -390,7 +395,7 @@ def test_sort():
         os.chdir(run_cwd)
         success, new_test = inject_profiling_into_existing_test(
             test_path,
-            [CodePosition(6, 13), CodePosition(10, 13)],
+            [CodePosition(8, 13), CodePosition(12, 13)],
             func,
             project_root_path,
             "pytest",
@@ -560,6 +565,7 @@ def test_perfinjector_bubble_sort_parametrized_results() -> None:
     computed_fn_opt = False
     code = """from code_to_optimize.bubble_sort import sorter
 import pytest
+import datetime
 
 
 @pytest.mark.parametrize(
@@ -571,6 +577,7 @@ def test_perfinjector_bubble_sort_parametrized_results() -> None:
     ],
 )
 def test_sort_parametrized(input, expected_output):
+    print(datetime.datetime.now().isoformat())
     output = sorter(input)
     assert output == expected_output
 """
@@ -606,6 +613,7 @@ def test_sort_parametrized(input, expected_output):
         """import gc
 import os
 import time
+import datetime
 
 import pytest
 
@@ -618,6 +626,7 @@ def test_sort_parametrized(input, expected_output):
 @pytest.mark.parametrize('input, expected_output', [([5, 4, 3, 2, 1, 0], [0, 1, 2, 3, 4, 5]), ([5.0, 4.0, 3.0, 2.0, 1.0, 0.0], [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]), (list(reversed(range(50))), list(range(50)))])
 def test_sort_parametrized(input, expected_output):
     codeflash_loop_index = int(os.environ['CODEFLASH_LOOP_INDEX'])
+    print(datetime.datetime.now().isoformat())
     output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort_parametrized', 'sorter', '0', codeflash_loop_index, input)
     assert output == expected_output
 """
@@ -643,11 +652,11 @@ def test_sort_parametrized(input, expected_output):
         func = FunctionToOptimize(function_name="sorter", parents=[], file_path=code_path)
         os.chdir(run_cwd)
         success, new_test = inject_profiling_into_existing_test(
-            test_path, [CodePosition(14, 13)], func, project_root_path, "pytest", mode=TestingMode.BEHAVIOR
+            test_path, [CodePosition(16, 13)], func, project_root_path, "pytest", mode=TestingMode.BEHAVIOR
         )
         assert success
         success, new_test_perf = inject_profiling_into_existing_test(
-            test_path, [CodePosition(14, 13)], func, project_root_path, "pytest", mode=TestingMode.PERFORMANCE
+            test_path, [CodePosition(16, 13)], func, project_root_path, "pytest", mode=TestingMode.PERFORMANCE
         )
 
         os.chdir(original_cwd)

From 1070815f972aa07b708178f1ee1d33d0afa1db43 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Tue, 3 Jun 2025 02:09:08 -0700
Subject: [PATCH 08/14] make ruff happy

---
 codeflash/verification/parse_test_output.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index 480b40630..ed9ebf1c8 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -265,7 +265,7 @@ def parse_test_xml(
                         timed_out = True
 
             sys_stdout = testcase.system_out or ""
-            begin_matches = [match for match in matches_re_start.finditer(sys_stdout)]
+            begin_matches = list(matches_re_start.finditer(sys_stdout))
             end_matches = {}
             for match in matches_re_end.finditer(sys_stdout):
                 groups = match.groups()

From 38f1ab55cc72a06e71cf189dd0e3428589c93ab7 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Tue, 3 Jun 2025 13:42:44 -0700
Subject: [PATCH 09/14] try one fix attempt

---
 tests/test_instrument_all_and_run.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_instrument_all_and_run.py b/tests/test_instrument_all_and_run.py
index 812533521..75b1de3a3 100644
--- a/tests/test_instrument_all_and_run.py
+++ b/tests/test_instrument_all_and_run.py
@@ -247,6 +247,7 @@ def test_sort():
 import time
 
 import dill as pickle
+
 from code_to_optimize.bubble_sort_method import BubbleSorter
 
 

From 3a6e4816788eb6f4f892140cb016d3d278e73ec9 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Tue, 3 Jun 2025 15:46:58 -0700
Subject: [PATCH 10/14] fix one test

---
 codeflash/verification/codeflash_capture.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/codeflash/verification/codeflash_capture.py b/codeflash/verification/codeflash_capture.py
index 3d28a027e..45d046cf6 100644
--- a/codeflash/verification/codeflash_capture.py
+++ b/codeflash/verification/codeflash_capture.py
@@ -112,9 +112,8 @@ def wrapper(*args, **kwargs) -> None:  # noqa: ANN002, ANN003
 
             # Generate invocation id
             invocation_id = f"{line_id}_{codeflash_test_index}"
-            print(
-                f"!######{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}######!"
-            )
+            test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}"
+            print(f"!$######{test_stdout_tag}######$!")
             # Connect to sqlite
             codeflash_con = sqlite3.connect(f"{tmp_dir_path}_{codeflash_iteration}.sqlite")
             codeflash_cur = codeflash_con.cursor()
@@ -131,6 +130,7 @@ def wrapper(*args, **kwargs) -> None:  # noqa: ANN002, ANN003
                 exception = e
             finally:
                 gc.enable()
+            print(f"!######{test_stdout_tag}######!")
 
             # Capture instance state after initialization
             if hasattr(args[0], "__dict__"):

From 537ca0139ca6d24e04dd8490eec9c5ed155c20b3 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Tue, 3 Jun 2025 15:55:48 -0700
Subject: [PATCH 11/14] fix one test

---
 tests/test_instrument_all_and_run.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/test_instrument_all_and_run.py b/tests/test_instrument_all_and_run.py
index 75b1de3a3..7e1a20f49 100644
--- a/tests/test_instrument_all_and_run.py
+++ b/tests/test_instrument_all_and_run.py
@@ -356,9 +356,7 @@ def test_sort():
         assert test_results[1].runtime > 0
         assert test_results[1].did_pass
         assert test_results[1].return_value == ([0, 1, 2, 3, 4, 5],)
-        out_str = (
-            """codeflash stdout : BubbleSorter.sorter() called\n\n\ncodeflash stdout : BubbleSorter.sorter() called"""
-        )
+        out_str = """codeflash stdout : BubbleSorter.sorter() called\n"""
         assert test_results[1].stdout == out_str
         assert compare_test_results(test_results, test_results)
         assert test_results[2].id.function_getting_tested == "BubbleSorter.__init__"
@@ -376,6 +374,7 @@ def test_sort():
         )
         assert test_results[3].runtime > 0
         assert test_results[3].did_pass
+        assert test_results[3].stdout == """codeflash stdout : BubbleSorter.sorter() called\n"""
 
         results2, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,

From 3cd3f0a111fd6975a3fb13e7ff0127d42bbff3ac Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Tue, 3 Jun 2025 16:09:35 -0700
Subject: [PATCH 12/14] fix one more test

---
 tests/test_instrument_tests.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/tests/test_instrument_tests.py b/tests/test_instrument_tests.py
index c5fd19383..254352073 100644
--- a/tests/test_instrument_tests.py
+++ b/tests/test_instrument_tests.py
@@ -367,6 +367,7 @@ def test_sort():
 def test_sort():
     codeflash_loop_index = int(os.environ['CODEFLASH_LOOP_INDEX'])
     input = [5, 4, 3, 2, 1, 0]
+    print(datetime.datetime.now().isoformat())
     output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort', 'sorter', '2', codeflash_loop_index, input)
     assert output == [0, 1, 2, 3, 4, 5]
     input = [5.0, 4.0, 3.0, 2.0, 1.0, 0.0]
@@ -395,7 +396,7 @@ def test_sort():
         os.chdir(run_cwd)
         success, new_test = inject_profiling_into_existing_test(
             test_path,
-            [CodePosition(8, 13), CodePosition(12, 13)],
+            [CodePosition(8, 14), CodePosition(12, 14)],
             func,
             project_root_path,
             "pytest",
@@ -411,7 +412,7 @@ def test_sort():
 
         success, new_perf_test = inject_profiling_into_existing_test(
             test_path,
-            [CodePosition(6, 13), CodePosition(10, 13)],
+            [CodePosition(8, 14), CodePosition(12, 14)],
             func,
             project_root_path,
             "pytest",
@@ -461,7 +462,7 @@ def test_sort():
             testing_time=0.1,
         )
         assert test_results[0].id.function_getting_tested == "sorter"
-        assert test_results[0].id.iteration_id == "1_0"
+        assert test_results[0].id.iteration_id == "2_0"
         assert test_results[0].id.test_class_name is None
         assert test_results[0].id.test_function_name == "test_sort"
         assert (
@@ -473,7 +474,7 @@ def test_sort():
         assert test_results[0].return_value == ([0, 1, 2, 3, 4, 5],)
 
         assert test_results[1].id.function_getting_tested == "sorter"
-        assert test_results[1].id.iteration_id == "4_0"
+        assert test_results[1].id.iteration_id == "5_0"
         assert test_results[1].id.test_class_name is None
         assert test_results[1].id.test_function_name == "test_sort"
         assert (
@@ -496,7 +497,7 @@ def test_sort():
             testing_time=0.1,
         )
         assert test_results_perf[0].id.function_getting_tested == "sorter"
-        assert test_results_perf[0].id.iteration_id == "1_0"
+        assert test_results_perf[0].id.iteration_id == "2_0"
         assert test_results_perf[0].id.test_class_name is None
         assert test_results_perf[0].id.test_function_name == "test_sort"
         assert (
@@ -514,7 +515,7 @@ def test_sort():
         )
 
         assert test_results_perf[1].id.function_getting_tested == "sorter"
-        assert test_results_perf[1].id.iteration_id == "4_0"
+        assert test_results_perf[1].id.iteration_id == "5_0"
         assert test_results_perf[1].id.test_class_name is None
         assert test_results_perf[1].id.test_function_name == "test_sort"
         assert (
@@ -565,7 +566,6 @@ def test_perfinjector_bubble_sort_parametrized_results() -> None:
     computed_fn_opt = False
     code = """from code_to_optimize.bubble_sort import sorter
 import pytest
-import datetime
 
 
 @pytest.mark.parametrize(
@@ -577,7 +577,6 @@ def test_perfinjector_bubble_sort_parametrized_results() -> None:
     ],
 )
 def test_sort_parametrized(input, expected_output):
-    print(datetime.datetime.now().isoformat())
     output = sorter(input)
     assert output == expected_output
 """
@@ -613,7 +612,6 @@ def test_sort_parametrized(input, expected_output):
         """import gc
 import os
 import time
-import datetime
 
 import pytest
 
@@ -626,7 +624,6 @@ def test_sort_parametrized(input, expected_output):
 @pytest.mark.parametrize('input, expected_output', [([5, 4, 3, 2, 1, 0], [0, 1, 2, 3, 4, 5]), ([5.0, 4.0, 3.0, 2.0, 1.0, 0.0], [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]), (list(reversed(range(50))), list(range(50)))])
 def test_sort_parametrized(input, expected_output):
     codeflash_loop_index = int(os.environ['CODEFLASH_LOOP_INDEX'])
-    print(datetime.datetime.now().isoformat())
     output = codeflash_wrap(sorter, '{module_path}', None, 'test_sort_parametrized', 'sorter', '0', codeflash_loop_index, input)
     assert output == expected_output
 """
@@ -652,11 +649,11 @@ def test_sort_parametrized(input, expected_output):
         func = FunctionToOptimize(function_name="sorter", parents=[], file_path=code_path)
         os.chdir(run_cwd)
         success, new_test = inject_profiling_into_existing_test(
-            test_path, [CodePosition(16, 13)], func, project_root_path, "pytest", mode=TestingMode.BEHAVIOR
+            test_path, [CodePosition(14, 13)], func, project_root_path, "pytest", mode=TestingMode.BEHAVIOR
         )
         assert success
         success, new_test_perf = inject_profiling_into_existing_test(
-            test_path, [CodePosition(16, 13)], func, project_root_path, "pytest", mode=TestingMode.PERFORMANCE
+            test_path, [CodePosition(14, 13)], func, project_root_path, "pytest", mode=TestingMode.PERFORMANCE
         )
 
         os.chdir(original_cwd)

From 6c00fb3b5c98c48c59902feb9175707df2bf1b88 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Tue, 3 Jun 2025 16:18:02 -0700
Subject: [PATCH 13/14] fix one more test

---
 ...t_instrumentation_run_results_aiservice.py | 37 +++++++++----------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/tests/test_instrumentation_run_results_aiservice.py b/tests/test_instrumentation_run_results_aiservice.py
index ddfd8de8c..78d9973f1 100644
--- a/tests/test_instrumentation_run_results_aiservice.py
+++ b/tests/test_instrumentation_run_results_aiservice.py
@@ -323,30 +323,27 @@ def test_single_element_list():
         # Verify instance_state result, which checks instance state right after __init__, using  codeflash_capture
 
         # Verify function_to_optimize result
-        assert test_results[0].id.function_getting_tested == "sorter"
+        assert test_results[0].id.function_getting_tested == "BubbleSorter.__init__"
         assert test_results[0].id.test_function_name == "test_single_element_list"
         assert test_results[0].did_pass
+        assert test_results[0].return_value[0] == {"x": 0}
+        assert test_results[0].stdout == ""
+        assert test_results[1].id.function_getting_tested == "sorter"
+        assert test_results[1].id.test_function_name == "test_single_element_list"
+        assert test_results[1].did_pass
 
         # Checks input values to the function to see if they have mutated
         # assert comparator(test_results[1].return_value[1]["self"], BubbleSorter()) TODO: add self as input
-        assert test_results[0].return_value[1]["arr"] == [1, 2, 3]
+        assert test_results[1].return_value[1]["arr"] == [1, 2, 3]
 
         # Check function return value
-        assert test_results[0].return_value[2] == [1, 2, 3]
-        assert (
-            test_results[0].stdout
-            == """codeflash stdout : BubbleSorter.sorter() called
-"""
-        )
-        assert test_results[1].id.function_getting_tested == "BubbleSorter.__init__"
-        assert test_results[1].id.test_function_name == "test_single_element_list"
-        assert test_results[1].did_pass
-        assert test_results[1].return_value[0] == {"x": 0}
+        assert test_results[1].return_value[2] == [1, 2, 3]
         assert (
             test_results[1].stdout
             == """codeflash stdout : BubbleSorter.sorter() called
 """
         )
+
         # Replace with optimized code that mutated instance attribute
         optimized_code_mutated_attr = """
 import sys
@@ -401,10 +398,10 @@ def sorter(self, arr):
             testing_time=0.1,
         )
         # assert test_results_mutated_attr[0].return_value[0]["self"].x == 1 TODO: add self as input
-        assert test_results_mutated_attr[1].id.function_getting_tested == "BubbleSorter.__init__"
-        assert test_results_mutated_attr[1].return_value[0] == {"x": 1}
-        assert test_results_mutated_attr[1].verification_type == VerificationType.INIT_STATE_FTO
-        assert test_results_mutated_attr[1].stdout == "codeflash stdout : BubbleSorter.sorter() called\n"
+        assert test_results_mutated_attr[0].id.function_getting_tested == "BubbleSorter.__init__"
+        assert test_results_mutated_attr[0].return_value[0] == {"x": 1}
+        assert test_results_mutated_attr[0].verification_type == VerificationType.INIT_STATE_FTO
+        assert test_results_mutated_attr[0].stdout == ""
         assert not compare_test_results(
             test_results, test_results_mutated_attr
         )  # The test should fail because the instance attribute was mutated
@@ -453,10 +450,10 @@ def sorter(self, arr):
             pytest_max_loops=1,
             testing_time=0.1,
         )
-        assert test_results_new_attr[1].id.function_getting_tested == "BubbleSorter.__init__"
-        assert test_results_new_attr[1].return_value[0] == {"x": 0, "y": 2}
-        assert test_results_new_attr[1].verification_type == VerificationType.INIT_STATE_FTO
-        assert test_results_new_attr[1].stdout == "codeflash stdout : BubbleSorter.sorter() called\n"
+        assert test_results_new_attr[0].id.function_getting_tested == "BubbleSorter.__init__"
+        assert test_results_new_attr[0].return_value[0] == {"x": 0, "y": 2}
+        assert test_results_new_attr[0].verification_type == VerificationType.INIT_STATE_FTO
+        assert test_results_new_attr[0].stdout == ""
         # assert test_results_new_attr[1].return_value[1]["self"].x == 0 TODO: add self as input
         # assert test_results_new_attr[1].return_value[1]["self"].y == 2 TODO: add self as input
         assert compare_test_results(

From 0e5f79fab3add6c92c2b488df9502991b318a144 Mon Sep 17 00:00:00 2001
From: Saurabh Misra <misra.saurabh1@gmail.com>
Date: Tue, 3 Jun 2025 16:29:40 -0700
Subject: [PATCH 14/14] fix one more test

---
 tests/test_instrument_tests.py | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/tests/test_instrument_tests.py b/tests/test_instrument_tests.py
index 254352073..a117c2205 100644
--- a/tests/test_instrument_tests.py
+++ b/tests/test_instrument_tests.py
@@ -131,11 +131,11 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi
     """
     if sys.version_info < (3, 12):
         expected += """test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}"
-    print(f"!$######{{test_stdout_tag}}######$!")"""
+    """
     else:
         expected += """test_stdout_tag = f'{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}'
-    print(f'!$######{{test_stdout_tag}}######$!')"""
-    expected += """
+    """
+    expected += """print(f'!$######{{test_stdout_tag}}######$!')
     exception = None
     gc.disable()
     try:
@@ -146,12 +146,7 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi
         codeflash_duration = time.perf_counter_ns() - counter
         exception = e
     gc.enable()
-    """
-    if sys.version_info < (3, 12):
-        expected += """print(f"!######{{test_stdout_tag}}######!")"""
-    else:
-        expected += """print(f'!######{{test_stdout_tag}}######!')"""
-    expected += """
+    print(f'!######{{test_stdout_tag}}######!')
     pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value)
     codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call'))
     codeflash_con.commit()
@@ -240,11 +235,11 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi
     """
     if sys.version_info < (3, 12):
         expected += """test_stdout_tag = f"{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}"
-    print(f"!$######{{test_stdout_tag}}######$!")"""
+    """
     else:
         expected += """test_stdout_tag = f'{{test_module_name}}:{{(test_class_name + '.' if test_class_name else '')}}{{test_name}}:{{function_name}}:{{loop_index}}:{{invocation_id}}'
-    print(f'!$######{{test_stdout_tag}}######$!')"""
-    expected += """
+    """
+    expected += """print(f'!$######{{test_stdout_tag}}######$!')
     exception = None
     gc.disable()
     try:
@@ -255,12 +250,7 @@ def codeflash_wrap(wrapped, test_module_name, test_class_name, test_name, functi
         codeflash_duration = time.perf_counter_ns() - counter
         exception = e
     gc.enable()
-    """
-    if sys.version_info < (3, 12):
-        expected += """print(f"!######{{test_stdout_tag}}######!")"""
-    else:
-        expected += """print(f'!######{{test_stdout_tag}}######!')"""
-    expected += """
+    print(f'!######{{test_stdout_tag}}######!')
     pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps(return_value)
     codeflash_cur.execute('INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', (test_module_name, test_class_name, test_name, function_name, loop_index, invocation_id, codeflash_duration, pickled_return_value, 'function_call'))
     codeflash_con.commit()