In [83]:
import ast
import sys
from pathlib import Path
import shutil

# ---------------------------------------------------------------------- #
#  Global constants & Configuration
# ---------------------------------------------------------------------- #

def find_project_root():
    """Traverse upwards to find the project root, marked by the .git folder."""
    current_path = Path.cwd()
    while current_path != current_path.parent:
        if (current_path / ".git").is_dir():
            return current_path
        current_path = current_path.parent
    raise FileNotFoundError("Could not find project root. Is this a git repository?")


PROJECT_ROOT = find_project_root()
BASE_INPUT_DIR = PROJECT_ROOT / 'data' / 'code_gen_outputs_cleaned'
BASE_OUTPUT_DIR = PROJECT_ROOT / 'data' / 'code_gen_outputs_formatted' / 'formatted'
print(f"Project root found: {PROJECT_ROOT}")
print(f"Base input directory set to: {BASE_INPUT_DIR}")
print(f"Base output directory set to: {BASE_OUTPUT_DIR}")

MODEL_DICT = {
  "anthropic": ["claude-3-5-haiku-20241022"], 
  "openai": ["gpt-4.1-mini"],
  "google": ["gemini-2.0-flash-thinking-exp", 
             "gemini-2.5-flash-lite-preview-06-17",
             "gemini-2.5-flash"]
}

MODELS = [f"{provider}_{model}" for provider, sublist in MODEL_DICT.items() for model in sublist]
print(f"Available models: {MODELS}")

Project root found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math
Base input directory set to: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_cleaned
Base output directory set to: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_formatted/formatted
Available models: ['anthropic_claude-3-5-haiku-20241022', 'openai_gpt-4.1-mini', 'google_gemini-2.0-flash-thinking-exp', 'google_gemini-2.5-flash-lite-preview-06-17', 'google_gemini-2.5-flash']


In [84]:
import re

def get_code_lines_dict(
        problem_index: int, 
        model_name: str, 
        base_input_dir: Path = BASE_INPUT_DIR):
    """
    Returns a dict mapping line numbers (0-based) to the verbatim lines of code
    for the given problem index and model name.
    """
    import sys
    problem_dir = base_input_dir / str(problem_index)
    file_path = problem_dir / f"{model_name}.py"
    if not file_path.exists():
        print(f"[Error] File not found: {file_path}", file=sys.stderr)
        return None
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()
    return {i: line.rstrip("\n") for i, line in enumerate(lines)}

def format_comment(comment_line):
    """
    Accepts a comment string. Returns '\n' if not in the format '#: ... Ln'.
    If correct, returns '#: Ln' (ignoring everything between).
    """
    m = re.match(r"^#:(.*)L(\d+)\s*$", comment_line.strip())
    if m:
        return f"#: L{m.group(2)}"
    return "\n"

def remove_trailing_comment(code_line):
    """
    Removes trailing comment from a code line unless it is '# FINAL ANSWER'.
    """
    if "#" not in code_line:
        return code_line
    code, comment = code_line.split("#", 1)
    if "FINAL ANSWER" in comment:
        return code_line.rstrip()
    return code.rstrip()

def remove_redundant_comments(lines):
    """
    Removes any comment line that is immediately followed by another comment line.
    """
    result = []
    for i, line in enumerate(lines):
        if line.startswith("#:"):
            # If next line is also a comment, skip this one
            if i + 1 < len(lines) and lines[i + 1].startswith("#:"):
                continue
            # If next line begins with "answer", skip this one
            if i + 1 < len(lines) and lines[i + 1].lstrip().startswith("answer"):
                continue
        result.append(line)
    return result

def format_generated_code(code_lines_dict):
    """
    Cleans and formats generated code according to the specified rules.
    """
    lines = list(code_lines_dict.values())
    # Find the end of the signature/docstring
    sig_end = 0
    for i, line in enumerate(lines):
        if line.strip().endswith('"""'):
            sig_end = i
            break
    signature = lines[:sig_end + 1]
    body = lines[sig_end + 1:]

    # First pass: format comments and remove trailing comments
    processed = []
    for line in body:
        stripped = line.strip()
        if stripped.startswith("#"):
            processed.append(format_comment(stripped))
        elif stripped == "":
            processed.append("\n")
        else:
            processed.append(remove_trailing_comment(line))

    # Second pass: remove all line breaks, keep only comments or code
    processed = [l for l in processed if l.strip() != ""]

    # Third pass: remove redundant comments
    processed = remove_redundant_comments(processed)

    # Assemble final code: signature, then body with correct indentation and blank lines before comments
    final_lines = signature.copy()
    indent = "    "
    for line in processed:
        if line.startswith("#:"):
            final_lines.append("")  # blank line before comment
            final_lines.append(f"{indent}{line}")
        elif line.lstrip().startswith("answer"):
            final_lines.append("")  # blank line before answer
            final_lines.append(f"{line}")
        else:
            final_lines.append(f"{indent}{line.strip()}")

    # Remove any extra blank lines at the end
    while final_lines and final_lines[-1].strip() == "":
        final_lines.pop()

    return "\n".join(final_lines)

import os

def batch_format_and_write(
    indices,
    models = MODELS,
    base_input_dir = BASE_INPUT_DIR,
    base_output_dir = BASE_OUTPUT_DIR,
    get_code_lines_dict=get_code_lines_dict,
    format_generated_code=format_generated_code
):
    """
    For each (index, model), generate formatted code and write to {base_output_dir}/{index}/{model}.py
    """
    for idx in indices:
        for model in models:
            code_lines_dict = get_code_lines_dict(idx, model, base_input_dir)
            if code_lines_dict is None:
                print(f"[Warning] No code for index={idx}, model={model}")
                continue
            formatted_code = format_generated_code(code_lines_dict)
            out_dir = base_output_dir / str(idx)
            out_dir.mkdir(parents=True, exist_ok=True)
            out_path = out_dir / f"{model}.py"
            with open(out_path, "w", encoding="utf-8") as f:
                f.write(formatted_code)
            print(f"Wrote formatted code to {out_path}")

In [85]:
INDICES = list(range(100))
batch_format_and_write(INDICES)

Wrote formatted code to /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_formatted/formatted/0/anthropic_claude-3-5-haiku-20241022.py
Wrote formatted code to /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_formatted/formatted/0/openai_gpt-4.1-mini.py
Wrote formatted code to /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_formatted/formatted/0/google_gemini-2.0-flash-thinking-exp.py
Wrote formatted code to /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_formatted/formatted/0/google_gemini-2.5-flash-lite-preview-06-17.py
Wrote formatted code to /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_formatted/formatted/0/google_gemini-2.5-flash.py
Wrote formatted code to /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_formatted/formatted/1/anthropic_claude-3-5-haiku-20241022.py
Wrote formatted code to /Users

[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_cleaned/52/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_cleaned/59/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_cleaned/71/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_cleaned/76/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_cleaned/82/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_cleaned/85/anthropic_claude-3-5-haiku-20241022.py
[Error] File not found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25

In [86]:
def wrapper(index: int):
    for model in MODELS:
        lines_dict = get_code_lines_dict(index, model)
        if lines_dict is not None:
            cleaned_code = format_generated_code(lines_dict)
            print("="*50)
            print(f"Cleaned code for {model}")
            print("-"*50)
            print(cleaned_code)
        else:
            print(f"[Error] No code found for {model} at index {index}", file=sys.stderr)

In [87]:
wrapper(10)

Cleaned code for anthropic_claude-3-5-haiku-20241022
--------------------------------------------------
def solve(
    total_people_consumed: int = 847,  # Over three hundred years, it has consumed 847 people
    num_periods: int = 3  # Monster rises once every hundred years over three hundred years
):
    """Index: 10.
    Returns: the number of people on the first ship the monster consumed."""

    #: L4
    total_ship_multiplier = 1 + 2 + 4

    #: L5
    people_on_first_ship = total_people_consumed / total_ship_multiplier

    answer = people_on_first_ship  # FINAL ANSWER
    return answer
Cleaned code for openai_gpt-4.1-mini
--------------------------------------------------
def solve(
    total_people: int = 847,  # it has consumed 847 people over three hundred years
):
    """Index: 10.
    Returns: the number of people on the ship the monster ate in the first hundred years.
    """

    #: L4
    total_ships_factor = 7

    #: L5
    first_ship_people = total_people / total_shi

In [88]:
wrapper(16)

Cleaned code for anthropic_claude-3-5-haiku-20241022
--------------------------------------------------
def solve(
    johnson_share_ratio: int = 5,  # Johnson gets 5 parts
    mike_share_ratio: int = 2,  # Mike gets 2 parts
    johnson_amount: int = 2500,  # Johnson got $2500
    shirt_cost: int = 200  # shirt costs $200
):
    """Index: 16.
    Returns: the amount of money Mike has left after buying a shirt."""

    #: L2
    part_value = johnson_amount / johnson_share_ratio

    #: L3
    mike_total_share = mike_share_ratio * part_value

    #: L4
    mike_remaining_amount = mike_total_share - shirt_cost

    answer = mike_remaining_amount  # FINAL ANSWER
    return answer
Cleaned code for openai_gpt-4.1-mini
--------------------------------------------------
def solve(
    johnson_share: int = 2500,  # Johnson got $2500
    shirt_cost: int = 200  # shirt costs $200
):
    """Index: 16.
    Returns: the amount Mike has left after buying the shirt.
    """

    #: L2
    value_per_pa

In [89]:
wrapper(22)

Cleaned code for anthropic_claude-3-5-haiku-20241022
--------------------------------------------------
def solve(
    jamie_last_name: str = "Grey", # Jamie's full name is Jamie Grey
    letters_difference: int = 3 # Samantha's last name has three fewer letters than Bobbie's
):
    """Index: 22.
    Returns: the number of letters in Samantha's last name."""

    #: L1
    jamie_last_name_length = len(jamie_last_name)
    bobbie_last_name_length = jamie_last_name_length * 2 + 2

    #: L2
    samantha_last_name_length = bobbie_last_name_length - letters_difference

    answer = samantha_last_name_length  # FINAL ANSWER
    return answer
Cleaned code for openai_gpt-4.1-mini
--------------------------------------------------
def solve(
    jamies_last_name_length: int = 4,  # Jamie’s last name is Grey, which has 4 letters
    letters_removed_from_bobbie: int = 2,  # Bobbie took two letters off her last name
    difference_samantha_bobbie: int = 3  # Samantha’s last name has three fewer l

In [90]:
wrapper(29)

Cleaned code for anthropic_claude-3-5-haiku-20241022
--------------------------------------------------
def solve(
    previous_rent_percentage: float = 0.4,  # Mrs. Snyder used to spend 40% of her monthly income on rent and utilities
    new_rent_percentage: float = 0.25,  # now her rent and utilities only amount to 25% of her monthly income
    income_increase: int = 600  # Her salary was recently increased by $600
):
    """Index: 29.
    Returns: Mrs. Snyder's previous monthly income."""

    answer = 1000  # FINAL ANSWER
    return answer
Cleaned code for openai_gpt-4.1-mini
--------------------------------------------------
def solve(
    increase_amount: int = 600,  # salary was recently increased by $600
    old_rent_fraction: float = 0.40,  # used to spend 40% of income on rent and utilities
    new_rent_fraction: float = 0.25  # now rent and utilities amount to 25% of income
):
    """Index: 29.
    Returns: the previous monthly income before the salary increase.
    """

   