In [10]:
# ---------------------------------------------------------------------- #
#  Global constants & Configuration
# ---------------------------------------------------------------------- #

from pathlib import Path
import importlib

def find_project_root():
    """Traverse upwards to find the project root, marked by the .git folder."""
    current_path = Path.cwd()
    while current_path != current_path.parent:
        if (current_path / ".git").is_dir():
            return current_path
        current_path = current_path.parent
    raise FileNotFoundError("Could not find project root. Is this a git repository?")


PROJECT_ROOT = find_project_root()
BASE_INPUT_DIR = PROJECT_ROOT / 'data' / 'code_with_error'
BASE_OUTPUT_DIR = PROJECT_ROOT / 'data' / 'code_with_error_traced'

#Make the output directory if it doesn't exist
if not BASE_OUTPUT_DIR.exists():
    BASE_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    print(f"Created output directory: {BASE_OUTPUT_DIR}")

# Confirm the paths
print(f"Project root found: {PROJECT_ROOT}")
print(f"Base input directory set to: {BASE_INPUT_DIR}")
print(f"Base output directory set to: {BASE_OUTPUT_DIR}")

MODEL_DICT = {
  "anthropic": ["claude-3-5-haiku-20241022"], 
  "openai": ["gpt-4.1-mini"],
  "google": ["gemini-2.0-flash-thinking-exp", 
             "gemini-2.5-flash-lite-preview-06-17",
             "gemini-2.5-flash"]
}

MODELS = [f"{provider}_{model}" for provider, sublist in MODEL_DICT.items() for model in sublist]
print(f"Available models: {MODELS}")

INDICES = list(range(100))

Project root found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math
Base input directory set to: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error
Base output directory set to: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced
Available models: ['anthropic_claude-3-5-haiku-20241022', 'openai_gpt-4.1-mini', 'google_gemini-2.0-flash-thinking-exp', 'google_gemini-2.5-flash-lite-preview-06-17', 'google_gemini-2.5-flash']


In [11]:
def get_code_lines_dict(
        problem_index: int, 
        model_name: str, 
        base_input_dir: Path = BASE_INPUT_DIR):
    """
    Returns a dict mapping line numbers (0-based) to the verbatim lines of code
    for the given problem index and model name.
    """
    import sys
    problem_dir = base_input_dir / str(problem_index)
    file_path = problem_dir / f"{model_name}.py"
    if not file_path.exists():
        print(f"[Error] File not found: {file_path}", file=sys.stderr)
        return None
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()
    return {i: line.rstrip("\n") for i, line in enumerate(lines)}


def execution_trace(func):
    """Returns a dictionary mapping variable names to their values."""
    import inspect, ast
    src = inspect.getsource(func)
    tree = ast.parse(src)
    func_def = tree.body[0]
    env = {}
    for arg, default in zip(
        func_def.args.args[::-1], 
        func_def.args.defaults[::-1]
    ):
        env[arg.arg] = eval(compile(ast.Expression(default), '', 'eval'))
    for stmt in func_def.body:
        if isinstance(stmt, ast.Assign):
            code = compile(ast.Module([stmt], []), '', 'exec')
            exec(code, {}, env)
    # Return all variables, including arguments
    return env

def augment_code_with_trace(
    problem_index: int,
    model_name: str,
    input_dir: Path,
    output_dir: Path,
    verbose: bool = True
):
    """
    Augment a single Python file with execution trace comments using simple string replacement.
    """
    # Step 1: Get the individual lines of code
    code_lines_dict = get_code_lines_dict(problem_index, model_name, input_dir)
    if code_lines_dict is None:
        if verbose:
            print(f"Could not get code lines for problem {problem_index}, model {model_name}")
        return False
    
    try:
        # Load the module and get execution trace
        input_file = input_dir / str(problem_index) / f"{model_name}.py"
        spec = importlib.util.spec_from_file_location("module.name", input_file)
        if spec is None or spec.loader is None:
            if verbose:
                print(f"Could not load module spec for {input_file}")
            return False
                
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)
        
        # Get execution trace
        trace_dict = execution_trace(module.solve)
        
        # Step 2: Find the end of function signature (including docstring)
        lines = list(code_lines_dict.values())
        sig_end = -1
        for i, line in enumerate(lines):
            if line.strip().endswith('"""'):
                sig_end = i
                break
        
        if sig_end == -1:
            if verbose:
                print(f"Could not find end of docstring for {model_name}")
            return False
        
        # Split into signature and body
        signature_lines = lines[:sig_end + 1]
        body_lines = lines[sig_end + 1:]
        
        # Step 3-5: Process body lines
        processed_body = ""  # Start as empty string instead of list
        for line in body_lines:
            # Step 3: Add comment lines
            if line.strip().startswith('#'):
                processed_body += '\n'  # Add a blank line before comments
                processed_body += line + '\n'  # Append line with newline
                continue
            elif line.strip().startswith('answer ='):
                processed_body += line + '\n' # Do not add eval comment for answer lines
                continue
            
            # Step 4: For code lines, do search and replace
            if line.strip():  # Non-empty line
                eval_line = line
                sorted_trace_dict = sorted(trace_dict.items(), key=lambda x: len(x[0]), reverse=True)
                
                # Replace each variable with its value
                substituted = False
                for variable, value in sorted_trace_dict:
                    if variable in eval_line:
                        eval_line = eval_line.replace(variable, str(value))
                        substituted = True
                
                # Step 5: Append eval comment if any substitution happened
                if substituted:
                    # Get indentation from original line
                    new_line = line + " # eval: " + eval_line.lstrip()
                    processed_body += new_line + '\n'  # Append with newline
                else:
                    # If no substitution, just append the original line
                    processed_body += line + '\n'  # Append with newline
        
        # Step 6: Concatenate and save
        # Convert signature_lines to string and concatenate with processed_body
        signature_code = '\n'.join(signature_lines) + '\n'
        final_code = signature_code + processed_body
        
        # Write to output file
        output_file = output_dir / str(problem_index) / f"{model_name}.py"
        output_file.parent.mkdir(parents=True, exist_ok=True)
        
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(final_code)
        
        if verbose:
            print(f"Augmented {model_name} for problem {problem_index}")
        return True
        
    except Exception as e:
        if verbose:
            print(f"Error processing problem {problem_index}, model {model_name}: {e}")
        return False

In [12]:
import traceback

def batch_augment_errors_with_trace(
    indices: list,
    models: list,
    base_input_dir: Path = BASE_INPUT_DIR,
    base_output_dir: Path = BASE_OUTPUT_DIR
):
    """
    Batch-processes error-injected code files to add execution trace comments.

    Iterates through each error type, problem index, and model, calling the
    augment_code_with_trace function for each file.

    Args:
        indices: A list of problem indices to process.
        models: A list of model names to process.
        base_input_dir: The root directory of the error-injected code.
        base_output_dir: The root directory to save the traced code.
    """
    error_types = [
        'incorrect_operation',
        'computational_error',
        'incorrect_operand',
        'skipped_step'
    ]
    
    success_count = 0
    failure_count = 0

    for error_type in error_types:
        print(f"\n--- Processing Traces for Error Type: {error_type} ---")
        
        # Define the specific input/output directories for this error type
        current_input_dir = base_input_dir / error_type
        current_output_dir = base_output_dir / error_type

        for index in indices:
            for model_name in models:
                # The augment_code_with_trace function already handles its own
                # error printing and returns True/False. We just need to call it
                # with verbose=False to suppress its non-error print statements.
                success = augment_code_with_trace(
                    problem_index=index,
                    model_name=model_name,
                    input_dir=current_input_dir,
                    output_dir=current_output_dir,
                    verbose=True  # Set to False for quiet operation
                )

                if success:
                    # Construct the output path again just for the success message
                    output_file = current_output_dir / str(index) / f"{model_name}.py"
                    print(f"✓ Saved traced file: {output_file}")
                    success_count += 1
                else:
                    # The worker function will have printed the specific error.
                    # We just increment the failure count.
                    failure_count += 1
    
    print("\n--- Batch Tracing Complete ---")
    print(f"Successfully traced files: {success_count}")
    print(f"Failed or skipped files: {failure_count}")

In [13]:
batch_augment_errors_with_trace(
    indices=INDICES,
    models=MODELS
)


--- Processing Traces for Error Type: incorrect_operation ---
Augmented anthropic_claude-3-5-haiku-20241022 for problem 0
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operation/0/anthropic_claude-3-5-haiku-20241022.py
Augmented openai_gpt-4.1-mini for problem 0
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operation/0/openai_gpt-4.1-mini.py
Augmented google_gemini-2.0-flash-thinking-exp for problem 0
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operation/0/google_gemini-2.0-flash-thinking-exp.py
Augmented google_gemini-2.5-flash-lite-preview-06-17 for problem 0
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operation/0/google_gemini-2.5-flash-lite-preview-06-17.py
Augmented google_gemini-2.5-flash for pro

[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operation/10/openai_gpt-4.1-mini.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operation/29/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operation/29/openai_gpt-4.1-mini.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operation/52/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operation/59/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operation/71/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsures

Augmented google_gemini-2.5-flash for problem 75
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operation/75/google_gemini-2.5-flash.py
Could not get code lines for problem 76, model anthropic_claude-3-5-haiku-20241022
Augmented openai_gpt-4.1-mini for problem 76
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operation/76/openai_gpt-4.1-mini.py
Augmented google_gemini-2.0-flash-thinking-exp for problem 76
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operation/76/google_gemini-2.0-flash-thinking-exp.py
Augmented google_gemini-2.5-flash-lite-preview-06-17 for problem 76
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operation/76/google_gemini-2.5-flash-lite-preview-06-17.py
Augmented google_gemini-2.5-flash for

[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operation/98/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/computational_error/29/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/computational_error/52/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/computational_error/59/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/computational_error/71/anthropic_claude-3-5-haiku-20241022.py


Augmented openai_gpt-4.1-mini for problem 52
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/computational_error/52/openai_gpt-4.1-mini.py
Augmented google_gemini-2.0-flash-thinking-exp for problem 52
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/computational_error/52/google_gemini-2.0-flash-thinking-exp.py
Augmented google_gemini-2.5-flash-lite-preview-06-17 for problem 52
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/computational_error/52/google_gemini-2.5-flash-lite-preview-06-17.py
Augmented google_gemini-2.5-flash for problem 52
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/computational_error/52/google_gemini-2.5-flash.py
Augmented anthropic_claude-3-5-haiku-20241022 for problem 53
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/E

[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/computational_error/76/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/computational_error/82/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/computational_error/85/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/computational_error/98/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operand/29/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operand/29/openai_gpt-4.1-mini.py


Augmented google_gemini-2.0-flash-thinking-exp for problem 22
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operand/22/google_gemini-2.0-flash-thinking-exp.py
Augmented google_gemini-2.5-flash-lite-preview-06-17 for problem 22
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operand/22/google_gemini-2.5-flash-lite-preview-06-17.py
Augmented google_gemini-2.5-flash for problem 22
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operand/22/google_gemini-2.5-flash.py
Augmented anthropic_claude-3-5-haiku-20241022 for problem 23
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operand/23/anthropic_claude-3-5-haiku-20241022.py
Augmented openai_gpt-4.1-mini for problem 23
✓ Saved traced file: /Users/arvindsuresh/Documents/

[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operand/52/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operand/59/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operand/71/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operand/76/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operand/82/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/incorrect_operand/85/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found

Augmented anthropic_claude-3-5-haiku-20241022 for problem 90
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operand/90/anthropic_claude-3-5-haiku-20241022.py
Augmented openai_gpt-4.1-mini for problem 90
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operand/90/openai_gpt-4.1-mini.py
Augmented google_gemini-2.0-flash-thinking-exp for problem 90
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operand/90/google_gemini-2.0-flash-thinking-exp.py
Augmented google_gemini-2.5-flash-lite-preview-06-17 for problem 90
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/incorrect_operand/90/google_gemini-2.5-flash-lite-preview-06-17.py
Augmented google_gemini-2.5-flash for problem 90
✓ Saved traced file: /Users/arvindsuresh/Documents/Gith

[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/skipped_step/29/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/skipped_step/52/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/skipped_step/59/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/skipped_step/71/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/skipped_step/76/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/skipped_step/82/anthropic_claude-3-5-haiku-20241022.py


Augmented google_gemini-2.0-flash-thinking-exp for problem 58
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/skipped_step/58/google_gemini-2.0-flash-thinking-exp.py
Augmented google_gemini-2.5-flash-lite-preview-06-17 for problem 58
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/skipped_step/58/google_gemini-2.5-flash-lite-preview-06-17.py
Augmented google_gemini-2.5-flash for problem 58
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/skipped_step/58/google_gemini-2.5-flash.py
Could not get code lines for problem 59, model anthropic_claude-3-5-haiku-20241022
Augmented openai_gpt-4.1-mini for problem 59
✓ Saved traced file: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/skipped_step/59/openai_gpt-4.1-mini.py
Augmented google_gemini-2.0-flash-thinking-exp for problem 59
✓ S

[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/skipped_step/85/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error/skipped_step/98/anthropic_claude-3-5-haiku-20241022.py


In [14]:
import shutil
import traceback

def batch_copy_metadata_files(
    base_input_dir: Path = BASE_INPUT_DIR,
    base_output_dir: Path = BASE_OUTPUT_DIR
):
    """
    Finds all 'metadata*.json' files in the input directory structure and
    copies them to the corresponding location in the output directory structure.

    Args:
        base_input_dir: The root directory to search for metadata files.
        base_output_dir: The root directory to copy the metadata files to.
    """
    # Use rglob to find all metadata JSON files recursively.
    # The pattern "metadata*.json" is specific enough to avoid other JSON files.
    source_json_files = list(base_input_dir.rglob("metadata*.json"))
    
    if not source_json_files:
        print("No metadata files found to copy in:", base_input_dir)
        return

    print(f"Found {len(source_json_files)} metadata files to copy.")
    
    success_count = 0
    failure_count = 0

    for source_path in source_json_files:
        try:
            # Determine the destination path by replicating the sub-directory structure.
            relative_path = source_path.relative_to(base_input_dir)
            dest_path = base_output_dir / relative_path

            # Ensure the destination directory exists.
            dest_path.parent.mkdir(parents=True, exist_ok=True)

            # Copy the file, preserving metadata like timestamps.
            shutil.copy2(source_path, dest_path)
            
            print(f"✓ Copied: {dest_path}")
            success_count += 1
        
        except Exception as e:
            print(f"❌ ERROR copying {source_path} to {dest_path}:")
            traceback.print_exc()
            failure_count += 1

    print("\n--- Metadata Copying Complete ---")
    print(f"Successfully copied files: {success_count}")
    print(f"Failed copies: {failure_count}")

batch_copy_metadata_files()

Found 400 metadata files to copy.
✓ Copied: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/computational_error/61/metadata_61_computational_error.json
✓ Copied: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/computational_error/95/metadata_95_computational_error.json
✓ Copied: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/computational_error/59/metadata_59_computational_error.json
✓ Copied: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/computational_error/92/metadata_92_computational_error.json
✓ Copied: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/computational_error/66/metadata_66_computational_error.json
✓ Copied: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_with_error_traced/computational_error/50/metadata_50_computational_error.json
✓ Copied: /Users/arvindsuresh/