In [2]:
# ---------------------------------------------------------------------- #
#  Global constants & Configuration
# ---------------------------------------------------------------------- #

from pathlib import Path
import importlib

def find_project_root():
    """Traverse upwards to find the project root, marked by the .git folder."""
    current_path = Path.cwd()
    while current_path != current_path.parent:
        if (current_path / ".git").is_dir():
            return current_path
        current_path = current_path.parent
    raise FileNotFoundError("Could not find project root. Is this a git repository?")


PROJECT_ROOT = find_project_root()
BASE_INPUT_DIR = PROJECT_ROOT / 'data' / 'oracle_manifests'

# #Make the output directory if it doesn't exist
# if not BASE_OUTPUT_DIR.exists():
#     BASE_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
#     print(f"Created output directory: {BASE_OUTPUT_DIR}")

# Confirm the paths
print(f"Project root found: {PROJECT_ROOT}")
print(f"Base input directory set to: {BASE_INPUT_DIR}")
# print(f"Base output directory set to: {BASE_OUTPUT_DIR}")

MODEL_DICT = {
  "anthropic": ["claude-3-5-haiku-20241022"], 
  "openai": ["gpt-4.1-mini"],
  "google": ["gemini-2.0-flash-thinking-exp", 
             "gemini-2.5-flash-lite-preview-06-17",
             "gemini-2.5-flash"]
}

MODELS = [f"{provider}_{model}" for provider, sublist in MODEL_DICT.items() for model in sublist]
print(f"Available models: {MODELS}")

INDICES = list(range(100))

Project root found: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math
Base input directory set to: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests
Available models: ['anthropic_claude-3-5-haiku-20241022', 'openai_gpt-4.1-mini', 'google_gemini-2.0-flash-thinking-exp', 'google_gemini-2.5-flash-lite-preview-06-17', 'google_gemini-2.5-flash']


In [3]:
import json
import re
from pathlib import Path
from typing import Dict, Any
import pandas as pd
from IPython.display import display, Markdown
from datasets import load_dataset

# ---------------------------------------------------------------------- #
#  Helper Functions & Configuration
# ---------------------------------------------------------------------- #

def find_project_root():
    """Traverse upwards to find the project root, marked by the .git folder."""
    current_path = Path.cwd()
    while current_path != current_path.parent:
        if (current_path / ".git").is_dir():
            return current_path
        current_path = current_path.parent
    raise FileNotFoundError("Could not find project root. Is this a git repository?")

def build_solution_mapping(index: int, dataset: Any) -> Dict[str, str]:
    """
    Extracts the natural language solution for a given problem index,
    cleans it, and structures it into a line-numbered dictionary.
    """
    solution_mapping = {}
    solution_text = dataset[index]["answer"]
    lines = [ln.strip() for ln in solution_text.splitlines() if ln.strip()]

    # Improved regex to handle commas in the final answer
    if lines and re.match(r"^####\s*[\d\.,]+$", lines[-1]):
        solution_mapping["FA"] = lines.pop(-1).strip()

    # Normalize calculator annotation brackets for consistent parsing
    angle = re.compile(r"<<([^>]+)>>")
    lines = [angle.sub(r"[[\1]]", ln) for ln in lines]

    for i, line in enumerate(lines, 1):
        solution_mapping[f"L{i}"] = line

    return solution_mapping

# Load the GSM8K dataset once
try:
    gsm8k_train = load_dataset("gsm8k", "main", split="train")
except Exception as e:
    print(f"Could not load dataset. Please ensure 'datasets' is installed and you have an internet connection. Error: {e}")
    gsm8k_train = None

# ---------------------------------------------------------------------- #
#  Main Display Function
# ---------------------------------------------------------------------- #

def display_manifest(index: int):
    """
    Loads and displays the Oracle Manifest for a given problem index,
    including the original solution text for comparison.
    
    Args:
        index (int): The problem index to display (e.g., 310).
    """
    if gsm8k_train is None:
        print("Cannot display manifest because the dataset failed to load.")
        return

    try:
        PROJECT_ROOT = find_project_root()
        manifest_path = PROJECT_ROOT / 'data' / 'oracle_manifests' / f'_{index}.json'

        if not manifest_path.exists():
            print(f"Error: Manifest for index {index} not found at {manifest_path}")
            return

        with open(manifest_path, 'r', encoding='utf-8') as f:
            manifest = json.load(f)

        # --- Display Top-Level Information ---
        display(Markdown(f"# Oracle Manifest for Index: **{manifest.get('problem_index', 'N/A')}**"))
        display(Markdown("## Question"))
        display(Markdown(f"> {manifest.get('question', 'N/A')}"))

        # --- Display Function Code with Syntax Highlighting ---
        display(Markdown("## Function Code"))
        code = manifest.get('function_code', '# Code not found')
        display(Markdown(f"```python\n{code}\n```"))

        # --- Display Logical Steps in a Formatted Table ---
        display(Markdown("## Logical Steps"))
        steps = manifest.get('logical_steps', [])
        if steps:
            df_steps = pd.DataFrame(steps)
            
            # Get the original solution mapping from the dataset
            original_solution = build_solution_mapping(index, gsm8k_train)
            
            # Create a new column by mapping the manifest's line_number to the original solution
            df_steps['original_solution_line'] = df_steps['line_number'].apply(
                lambda ln: original_solution.get(ln, "N/A")
            )
            
            # Define the desired column order for clear comparison
            column_order = [
                'line_number', 
                'original_solution_line', 
                'output_variable', 
                'nl_template', 
                'calculator_annotation_template'
            ]
            
            # Reorder the DataFrame columns
            existing_columns_ordered = [col for col in column_order if col in df_steps.columns]
            df_steps = df_steps[existing_columns_ordered]
            
            # Set pandas display options to show full text content
            pd.set_option('display.max_colwidth', None)
            display(df_steps)
        else:
            print("No logical steps found in the manifest.")

    except Exception as e:
        print(f"An error occurred: {e}")

# ====================================================================
#              --== Interactively Display a Manifest ==--
#  Change the index number below and re-run this cell to view another.
# ====================================================================

INDEX_TO_DISPLAY = 3822
display_manifest(INDEX_TO_DISPLAY)

Error: Manifest for index 3822 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_3822.json


In [4]:
build_solution_mapping(3822, gsm8k_train)

{'FA': '#### 5',
 'L1': "To calculate Alec's goal number of votes, we need to know that 60 students / 4 = [[60/4=15]]15 students is equal to one-quarter of the class students.",
 'L2': "Alec's goal is therefore 15 students * 3 quarters = [[15*3=45]]45 votes.",
 'L3': 'Half of the class said they will vote for him, so there are already 60 students / 2 = [[60/2=30]]30 votes.',
 'L4': 'Another 5 students are thinking about voting for him which leaves a total so far of 30 + 5 = [[30+5=35]]35 votes.',
 'L5': 'This means there are 60 students - 35 voting for Alec = [[60-35=25]]25 students not voting for Alec.',
 'L6': 'A fifth of these decided to vote, so this is a further 25 students / 5 = [[25/5=5]]5 votes.',
 'L7': 'Alec is therefore receiving a total of 35 + 5 = [[35+5=40]]40 votes.',
 'L8': 'So he has missed his goal by 45 goal votes - 40 actual votes = [[45-40=5]]5 votes.'}

In [5]:
display_manifest(310)

Error: Manifest for index 310 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_310.json


In [6]:
display_manifest(7371)

Error: Manifest for index 7371 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_7371.json


In [7]:
for index in [49, 54, 81, 92]:
    display_manifest(index)

Error: Manifest for index 49 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_49.json
Error: Manifest for index 54 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_54.json
Error: Manifest for index 81 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_81.json
Error: Manifest for index 92 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_92.json


In [8]:
index = 48
def wrapper(index: int):
    sample = gsm8k_train[index]
    print(f"Index: {index}")
    print("Question:")
    print(sample['question'])
    print("Solution mapping:")
    print(build_solution_mapping(index, gsm8k_train))
    print()

In [13]:
for index in [4, 5, 8, 54, 72, 310, 3822]:
    wrapper(index)

Index: 4
Question:
James writes a 3-page letter to 2 different friends twice a week.  How many pages does he write a year?
Solution mapping:
{'FA': '#### 624', 'L1': 'He writes each friend 3*2=<<3*2=6>>6 pages a week', 'L2': 'So he writes 6*2=<<6*2=12>>12 pages every week', 'L3': 'That means he writes 12*52=<<12*52=624>>624 pages a year'}

Index: 5
Question:
Mark has a garden with flowers. He planted plants of three different colors in it. Ten of them are yellow, and there are 80% more of those in purple. There are only 25% as many green flowers as there are yellow and purple flowers. How many flowers does Mark have in his garden?
Solution mapping:
{'FA': '#### 35', 'L1': 'There are 80/100 * 10 = <<80/100*10=8>>8 more purple flowers than yellow flowers.', 'L2': "So in Mark's garden, there are 10 + 8 = <<10+8=18>>18 purple flowers.", 'L3': 'Purple and yellow flowers sum up to 10 + 18 = <<10+18=28>>28 flowers.', 'L4': "That means in Mark's garden there are 25/100 * 28 = <<25/100*28=7>>7 

In [10]:
import json
import re
from pathlib import Path
from typing import Dict, Any
import pandas as pd
from IPython.display import display, Markdown
from datasets import load_dataset

# ---------------------------------------------------------------------- #
#  Helper Functions & Configuration
# ---------------------------------------------------------------------- #

def find_project_root():
    """Traverse upwards to find the project root, marked by the .git folder."""
    current_path = Path.cwd()
    while current_path != current_path.parent:
        if (current_path / ".git").is_dir():
            return current_path
        current_path = current_path.parent
    raise FileNotFoundError("Could not find project root. Is this a git repository?")

def build_solution_mapping(index: int, dataset: Any) -> Dict[str, str]:
    """
    Extracts the natural language solution for a given problem index,
    cleans it, and structures it into a line-numbered dictionary.
    """
    solution_mapping = {}
    solution_text = dataset[index]["answer"]
    lines = [ln.strip() for ln in solution_text.splitlines() if ln.strip()]

    # Improved regex to handle commas in the final answer
    if lines and re.match(r"^####\s*[\d\.,]+$", lines[-1]):
        solution_mapping["FA"] = lines.pop(-1).strip()

    for i, line in enumerate(lines, 1):
        solution_mapping[f"L{i}"] = line

    return solution_mapping

# Load the GSM8K dataset once
try:
    gsm8k_train = load_dataset("gsm8k", "main", split="train")
except Exception as e:
    print(f"Could not load dataset. Please ensure 'datasets' is installed and you have an internet connection. Error: {e}")
    gsm8k_train = None

# ---------------------------------------------------------------------- #
#  Main Display Function for Alternative Manifests
# ---------------------------------------------------------------------- #

def display_alt_manifest(index: int):
    """
    Loads and displays the 'Just-in-Time' Oracle Manifest for a given problem index.
    
    Args:
        index (int): The problem index to display (e.g., 3822).
    """
    if gsm8k_train is None:
        print("Cannot display manifest because the dataset failed to load.")
        return

    try:
        PROJECT_ROOT = find_project_root()
        # Updated filename to match the alternative manifest format
        manifest_path = PROJECT_ROOT / 'data' / 'oracle_manifests' / f'_{index}_alt.json'

        if not manifest_path.exists():
            print(f"Error: Manifest for index {index} not found at {manifest_path}")
            return

        with open(manifest_path, 'r', encoding='utf-8') as f:
            manifest = json.load(f)

        # --- Display Top-Level Information ---
        display(Markdown(f"# Just-in-Time Manifest for Index: **{manifest.get('problem_index', 'N/A')}**"))
        display(Markdown("## Question"))
        display(Markdown(f"> {manifest.get('question', 'N/A')}"))

        # --- Display Function Code with Syntax Highlighting ---
        display(Markdown("## Function Code"))
        code = manifest.get('function_code', '# Code not found')
        display(Markdown(f"```python\n{code}\n```"))

        # --- Display Logical Steps in a Formatted Table ---
        display(Markdown("## Logical Steps"))
        steps = manifest.get('logical_steps', [])
        if steps:
            df_steps = pd.DataFrame(steps)
            
            original_solution = build_solution_mapping(index, gsm8k_train)
            
            df_steps['original_solution_line'] = df_steps['line_number'].apply(
                lambda ln: original_solution.get(ln, "N/A")
            )
            
            # Define the desired column order for the "Just-in-Time" format
            column_order = [
                'line_number', 
                'original_solution_line',
                'new_inputs',  # Added new column
                'output_variable', 
                'nl_template', 
                'calculator_annotation_template'
            ]
            
            existing_columns_ordered = [col for col in column_order if col in df_steps.columns]
            df_steps = df_steps[existing_columns_ordered]
            
            pd.set_option('display.max_colwidth', None)
            display(df_steps)
        else:
            print("No logical steps found in the manifest.")

    except Exception as e:
        print(f"An error occurred: {e}")

# ====================================================================
#              --== Interactively Display a Manifest ==--
#  Change the index number below and re-run this cell to view another.
# ====================================================================

for index in [49, 54, 72, 310, 3822, 7371]:
    display_alt_manifest(index)

Error: Manifest for index 49 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_49_alt.json
Error: Manifest for index 54 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_54_alt.json
Error: Manifest for index 72 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_72_alt.json
Error: Manifest for index 310 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_310_alt.json
Error: Manifest for index 3822 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_3822_alt.json
Error: Manifest for index 7371 not found at /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/oracle_manifests/_7371_alt.json


In [11]:
import json
import re
import math
import tempfile
import sys
from pathlib import Path
from typing import Dict, Any
import importlib.util
from datasets import load_dataset

# ---------------------------------------------------------------------- #
#  Helper Functions & Configuration
# ---------------------------------------------------------------------- #

def find_project_root():
    """Traverse upwards to find the project root, marked by the .git folder."""
    current_path = Path.cwd()
    while current_path != current_path.parent:
        if (current_path / ".git").is_dir():
            return current_path
        current_path = current_path.parent
    raise FileNotFoundError("Could not find project root. Is this a git repository?")

# Load the GSM8K dataset once
try:
    gsm8k_train = load_dataset("gsm8k", "main", split="train")
except Exception as e:
    print(f"Could not load dataset. Please ensure 'datasets' is installed. Error: {e}")
    gsm8k_train = None

# ---------------------------------------------------------------------- #
#  Main Validation Function
# ---------------------------------------------------------------------- #

def validate_manifest_answer(index: int) -> Dict[str, Any]:
    """
    Reads a JIT manifest, executes its function, and compares the result
    to the ground-truth answer from the GSM8K dataset.

    Args:
        index: The problem index to validate.

    Returns:
        A dictionary containing the validation result.
    """
    if gsm8k_train is None:
        return {"is_correct": False, "error": "GSM8K dataset not loaded."}
    
    try:
        # --- 1. Load the Manifest ---
        PROJECT_ROOT = find_project_root()
        manifest_path = PROJECT_ROOT / 'data' / 'oracle_manifests' / f'_{index}_alt.json'
        with open(manifest_path, 'r', encoding='utf-8') as f:
            manifest = json.load(f)
        function_code = manifest.get('function_code')
        if not function_code:
            return {"is_correct": False, "error": "No 'function_code' in manifest."}

        # --- 2. Get Ground-Truth Answer ---
        answer_text = gsm8k_train[index]['answer']
        match = re.search(r'####\s*([\d\.,]+)', answer_text)
        if not match:
            return {"is_correct": False, "error": "Could not parse ground-truth answer from dataset."}
        
        # Convert to float, removing commas
        gsm8k_answer = float(match.group(1).replace(',', ''))

        # --- 3. Dynamically Execute the Function Code ---
        # Create a temporary directory to write the module to
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_path = Path(temp_dir)
            module_path = temp_path / f"temp_module_{index}.py"
            module_path.write_text(function_code, encoding='utf-8')
            
            # Add the temp directory to the system path to allow import
            sys.path.insert(0, str(temp_path))
            
            spec = importlib.util.spec_from_file_location(f"temp_module_{index}", module_path)
            module = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(module)
            
            # --- 4. Run the Function ---
            manifest_answer = module.solve()

            # Clean up the path
            sys.path.pop(0)

    except FileNotFoundError:
        return {"is_correct": False, "error": f"Manifest file not found for index {index}."}
    except Exception as e:
        # Catch any error during execution (SyntaxError, NameError, etc.)
        return {"is_correct": False, "error": f"Execution failed: {type(e).__name__}: {e}"}

    # --- 5. Compare Answers and Return Result ---
    is_correct = math.isclose(manifest_answer, gsm8k_answer)
    
    return {
        "is_correct": is_correct,
        "manifest_answer": manifest_answer,
        "gsm8k_answer": gsm8k_answer,
        "error": None if is_correct else "Mismatch between manifest and ground-truth answer."
    }


indices_to_test = [49, 54, 72, 310, 3822, 7371]

for idx in indices_to_test:
    result = validate_manifest_answer(idx)
    
    if result["is_correct"]:
        status = f"✅ Correct"
        details = f"Manifest Answer: {result['manifest_answer']}, GSM8K Answer: {result['gsm8k_answer']}"
    else:
        status = f"❌ Incorrect"
        details = f"Error: {result['error']}"
        
    print(f"Index {idx}: {status} -> {details}")

Index 49: ❌ Incorrect -> Error: Manifest file not found for index 49.
Index 54: ❌ Incorrect -> Error: Manifest file not found for index 54.
Index 72: ❌ Incorrect -> Error: Manifest file not found for index 72.
Index 310: ❌ Incorrect -> Error: Manifest file not found for index 310.
Index 3822: ❌ Incorrect -> Error: Manifest file not found for index 3822.
Index 7371: ❌ Incorrect -> Error: Manifest file not found for index 7371.


In [12]:
import os
import openai
from dotenv import load_dotenv

def print_available_openai_models():
    """
    Connects to the OpenAI API and prints a sorted list of available models.
    """
    try:
        # 1. Load API Key from .env file
        # Assumes you have a .env file with OPENAI_API_KEY="sk-..."
        load_dotenv()
        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:
            print("❌ Error: OPENAI_API_KEY not found in environment variables or .env file.")
            return

        # 2. Initialize the client
        client = openai.OpenAI(api_key=api_key)

        # 3. Call the API to get the list of models
        print("Fetching model list from OpenAI API...")
        models_response = client.models.list()
        
        # 4. Extract and sort the model IDs
        model_ids = sorted([model.id for model in models_response.data])
        
        print("\n--- Available OpenAI Models ---")
        for model_id in model_ids:
            print(f"- {model_id}")
        print("\n--- End of List ---")

    except openai.AuthenticationError:
        print("❌ Authentication Error: The provided OpenAI API key is invalid or has expired.")
    except Exception as e:
        print(f"❌ An unexpected error occurred: {e}")

# ====================================================================
#              --== Run the function to list models ==--
# ====================================================================
print_available_openai_models()

Fetching model list from OpenAI API...

--- Available OpenAI Models ---
- babbage-002
- chatgpt-4o-latest
- codex-mini-latest
- dall-e-2
- dall-e-3
- davinci-002
- gpt-3.5-turbo
- gpt-3.5-turbo-0125
- gpt-3.5-turbo-1106
- gpt-3.5-turbo-16k
- gpt-3.5-turbo-instruct
- gpt-3.5-turbo-instruct-0914
- gpt-4
- gpt-4-0125-preview
- gpt-4-0613
- gpt-4-1106-preview
- gpt-4-turbo
- gpt-4-turbo-2024-04-09
- gpt-4-turbo-preview
- gpt-4.1
- gpt-4.1-2025-04-14
- gpt-4.1-mini
- gpt-4.1-mini-2025-04-14
- gpt-4.1-nano
- gpt-4.1-nano-2025-04-14
- gpt-4.5-preview
- gpt-4.5-preview-2025-02-27
- gpt-4o
- gpt-4o-2024-05-13
- gpt-4o-2024-08-06
- gpt-4o-2024-11-20
- gpt-4o-audio-preview
- gpt-4o-audio-preview-2024-10-01
- gpt-4o-audio-preview-2024-12-17
- gpt-4o-audio-preview-2025-06-03
- gpt-4o-mini
- gpt-4o-mini-2024-07-18
- gpt-4o-mini-audio-preview
- gpt-4o-mini-audio-preview-2024-12-17
- gpt-4o-mini-realtime-preview
- gpt-4o-mini-realtime-preview-2024-12-17
- gpt-4o-mini-search-preview
- gpt-4o-mini-searc