In [1]:
import sys
from pathlib import Path
import ast
import importlib.util

# ---------------------------------------------------------------------- #
#  Global constants & Configuration
# ---------------------------------------------------------------------- #

def find_project_root():
    """Traverse upwards to find the project root, marked by the .git folder."""
    current_path = Path.cwd()
    while current_path != current_path.parent:
        if (current_path / ".git").is_dir():
            return current_path
        current_path = current_path.parent
    raise FileNotFoundError("Could not find project root. Is this a git repository?")


PROJECT_ROOT = find_project_root()
BASE_OUTPUT_DIR = PROJECT_ROOT / 'data' / 'code_gen_outputs_formatted'
print(f"Project root found: {PROJECT_ROOT}")
print(f"Base output directory set to: {BASE_OUTPUT_DIR}")

MODEL_DICT = {
  "anthropic": ["claude-3-5-haiku-20241022"], 
  "openai": ["gpt-4.1-mini"],
  "google": ["gemini-2.0-flash-thinking-exp", 
             "gemini-2.5-flash-lite-preview-06-17",
             "gemini-2.5-flash"]
}

MODELS = [f"{provider}_{model}" for provider, sublist in MODEL_DICT.items() for model in sublist]
print(f"Available models: {MODELS}")

Project root found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math
Base output directory set to: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/code_gen_outputs_formatted
Available models: ['anthropic_claude-3-5-haiku-20241022', 'openai_gpt-4.1-mini', 'google_gemini-2.0-flash-thinking-exp', 'google_gemini-2.5-flash-lite-preview-06-17', 'google_gemini-2.5-flash']


In [2]:
import ast
import inspect

def execution_trace(func):
    # Get source and parse AST
    src = inspect.getsource(func)
    tree = ast.parse(src)
    func_def = tree.body[0]
    # Prepare environment with default args
    env = {}
    for arg, default in zip(
        func_def.args.args[::-1], 
        func_def.args.defaults[::-1]
    ):
        env[arg.arg] = eval(compile(ast.Expression(default), '', 'eval'))
    # Walk through each statement
    for stmt in func_def.body:
        if isinstance(stmt, ast.Assign):
            code = compile(ast.Module([stmt], []), '', 'exec')
            exec(code, {}, env)
    # Return only the assigned variables (not arguments)
    assigned_vars = {
        k: v for k, v in env.items()
        if k not in [arg.arg for arg in func_def.args.args]
    }
    return assigned_vars

In [4]:
def wrapper(index: int):
    for model in MODELS:
        print(f"Execution trace for {model} with index {index}:")
        problem_dir = BASE_OUTPUT_DIR / f"{index}"
        py_file_path = problem_dir / f"{model}.py"
        try:
            spec = importlib.util.spec_from_file_location("module.name", py_file_path)
            module = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(module)
        except FileNotFoundError:
            print(f"File not found: {py_file_path}, skipping.")
            continue
        except Exception as e:
            print(f"Error processing {py_file_path}: {e}, skipping.")
            continue

        trace = execution_trace(module.solve)
        display(trace)
        print("\n" + "="*50 + "\n")

In [6]:
wrapper(29)

Execution trace for anthropic_claude-3-5-haiku-20241022 with index 29:


{'answer': 1000}



Execution trace for openai_gpt-4.1-mini with index 29:


{'old_rent_cost_expr': 0.4,
 'new_rent_cost_expr': 0.25,
 'previous_income': 1000.0,
 'answer': 1000.0}



Execution trace for google_gemini-2.0-flash-thinking-exp with index 29:


{'lhs_step_6': 8.0,
 'rhs_multiplier_step_6': 5.0,
 'rhs_constant_step_6': 3000.0,
 'lhs_step_7': 3.0,
 'previous_income': 1000.0,
 'answer': 1000.0}



Execution trace for google_gemini-2.5-flash-lite-preview-06-17 with index 29:


{'previous_monthly_income': 999.9999999999999, 'answer': 999.9999999999999}



Execution trace for google_gemini-2.5-flash with index 29:


{'right_side_constant_term': 3000,
 'coefficient_of_p': 3,
 'previous_monthly_income': 1000.0,
 'answer': 1000.0}





In [7]:
wrapper(44)

Execution trace for anthropic_claude-3-5-haiku-20241022 with index 44:


{'accommodation_cost': 150.0,
 'food_cost': 300.0,
 'entertainment_cost': 250.0,
 'coursework_materials_cost': 300.0,
 'answer': 300.0}



Execution trace for openai_gpt-4.1-mini with index 44:


{'accommodation_cost': 150.0,
 'food_cost': 300.0,
 'entertainment_cost': 250.0,
 'coursework_materials_cost': 300.0,
 'answer': 300.0}



Execution trace for google_gemini-2.0-flash-thinking-exp with index 44:


{'accommodation_cost': 150.0,
 'food_cost': 300.0,
 'entertainment_cost': 250.0,
 'coursework_materials_cost': 300.0,
 'answer': 300.0}



Execution trace for google_gemini-2.5-flash-lite-preview-06-17 with index 44:


{'accommodation_cost': 150.0,
 'food_cost': 300.0,
 'entertainment_cost': 250.0,
 'coursework_materials_cost': 300.0,
 'answer': 300.0}



Execution trace for google_gemini-2.5-flash with index 44:


{'accommodation_cost': 150.0,
 'food_cost': 300.0,
 'entertainment_cost': 250.0,
 'coursework_materials_cost': 300.0,
 'answer': 300.0}





In [8]:
wrapper(16)

Execution trace for anthropic_claude-3-5-haiku-20241022 with index 16:


{'part_value': 500.0,
 'mike_total_share': 1000.0,
 'mike_remaining_amount': 800.0,
 'answer': 800.0}



Execution trace for openai_gpt-4.1-mini with index 16:


{'value_per_part': 500.0,
 'mike_share': 1000.0,
 'mike_after_shirt': 800.0,
 'answer': 800.0}



Execution trace for google_gemini-2.0-flash-thinking-exp with index 16:


{'value_per_part': 500.0,
 'mike_share': 1000.0,
 'mike_remaining': 800.0,
 'answer': 800.0}



Execution trace for google_gemini-2.5-flash-lite-preview-06-17 with index 16:


{'value_per_part': 500.0,
 'mike_share': 1000.0,
 'mike_remaining_share': 800.0,
 'answer': 800.0}



Execution trace for google_gemini-2.5-flash with index 16:


{'value_per_part': 500.0,
 'mike_share': 1000.0,
 'mike_remaining_share': 800.0,
 'answer': 800.0}





In [9]:
wrapper(99)

Execution trace for anthropic_claude-3-5-haiku-20241022 with index 99:


{'second_tank_water': 24.0,
 'fish_in_second_tank': 12.0,
 'fish_in_first_tank': 16.0,
 'fish_in_first_tank_after_eating': 15.0,
 'difference_in_fish': 3.0,
 'answer': 3.0}



Execution trace for openai_gpt-4.1-mini with index 99:


{'second_tank_gallons': 24.0,
 'second_tank_fish_count': 12.0,
 'first_tank_fish_count': 16.0,
 'first_tank_fish_after_eating': 15.0,
 'fish_difference': 3.0,
 'answer': 3.0}



Execution trace for google_gemini-2.0-flash-thinking-exp with index 99:


{'gallons_tank2': 24.0,
 'fish_tank2': 12.0,
 'initial_fish_tank1': 16.0,
 'fish_tank1_after_eating': 15.0,
 'difference_in_fish': 3.0,
 'answer': 3.0}



Execution trace for google_gemini-2.5-flash-lite-preview-06-17 with index 99:


{'gallons_in_second_tank': 24.0,
 'num_fish_second_tank': 12.0,
 'num_fish_first_tank': 16.0,
 'num_fish_first_tank_after_eating': 15.0,
 'difference_in_fish': 3.0,
 'answer': 3.0}



Execution trace for google_gemini-2.5-flash with index 99:


{'second_tank_gallons': 24.0,
 'fish_in_second_tank': 12.0,
 'initial_fish_in_first_tank': 16.0,
 'final_fish_in_first_tank': 15.0,
 'more_fish_in_first_tank': 3.0,
 'answer': 3.0}



