### Basic confuguration and setup

In [None]:
import json
import re
from pathlib import Path
from typing import Dict, Any, List
import pandas as pd
from IPython.display import display, Markdown
from datasets import load_dataset

import os
import openai
import google.generativeai as genai
import anthropic
import asyncio
import nest_asyncio
from openai import AsyncOpenAI
from anthropic import AsyncClient
from dotenv import load_dotenv

# --- API Client and Concurrency Configuration --- #

# This must be done once per kernel to allow asyncio to run in a Jupyter notebook..
nest_asyncio.apply()

# Load API Keys from .env file
load_dotenv()
print("Loaded environment variables from .env file.")

# Initialize Asynchronous API Clients
try:
    openai_client_async = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    anthropic_client_async = AsyncClient(api_key=os.getenv("ANTHROPIC_API_KEY"))
    genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
    print("API clients initialized successfully.")
except TypeError:
    print("API key not found for one or more services. Please check your .env file.")
    # Assign None to prevent errors in subsequent cells
    openai_client_async = None
    anthropic_client_async = None

# Define API Concurrency Limits to prevent 429 "Too Many Requests" errors.
API_CONCURRENCY_LIMITS = {
    "google": 2,    # Gemini API has low RPM limits, so keep this low.
    "anthropic": 2, # Anthropic's token-based limits are complex, 2 is a safe start.
    "openai": 2,    # OpenAI APIs are generally more permissive.
}
print(f"API concurrency limits set to: {API_CONCURRENCY_LIMITS}")


# --- Set directories and paths --- #

def find_project_root():
    """Traverse upwards to find the project root, marked by the .git folder."""
    current_path = Path.cwd()
    while current_path != current_path.parent:
        if (current_path / ".git").is_dir():
            return current_path
        current_path = current_path.parent
    raise FileNotFoundError("Could not find project root. Is this a git repository?")

PROJECT_ROOT = find_project_root()
DATA_DIR = PROJECT_ROOT / 'data'
SAMPLE_MANIFESTS_DIR = DATA_DIR / 'sample_manifests'
MANIFEST_OUTPUT_DIR = DATA_DIR / 'generated_manifests_raw'
MANIFEST_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print(f"Project root found at: {PROJECT_ROOT}")
print(f"Data directory found at: {DATA_DIR}")
print(f"Sample manifests directory found at: {SAMPLE_MANIFESTS_DIR}")
print(f"Raw manifest output directory set to: {MANIFEST_OUTPUT_DIR}")

# Load the GSM8K dataset
GSM8K_TRAIN = load_dataset("gsm8k", "main", split="train")

# Few shot example indices (chosen for maximum diversity)
EXAMPLE_INDICES = [54, 72, 310]


# --- Helper Functions for Manifest loading and displaying --- #

def build_solution_mapping(
        index: int, 
        dataset: 'datasets.Dataset' = GSM8K_TRAIN,
        exclude_FA: bool = True
    ) -> Dict[str, str]:
    """
    Extracts the natural language solution for a given problem index,
    cleans it, and structures it into a line-numbered dictionary.
    """
    solution_mapping = {}
    solution_text = dataset[index]["answer"]
    lines = [ln.strip() for ln in solution_text.splitlines() if ln.strip()]

    # Improved regex to handle commas in the final answer
    if lines and re.match(r"^####\s*[\d\.,]+$", lines[-1]):
        solution_mapping["FA"] = lines.pop(-1).strip()

    for i, line in enumerate(lines, 1):
        solution_mapping[f"L{i}"] = line

    if exclude_FA and "FA" in solution_mapping:
        del solution_mapping["FA"]

    return solution_mapping

def load_manifest(index: int, manifest_dir: Path):
    """Loads the manifest for a given index."""
    manifest_path = manifest_dir / f'_{index}_alt.json'
    if not manifest_path.exists():
        raise FileNotFoundError(f"Manifest for index {index} not found at {manifest_path}")
    with open(manifest_path, 'r', encoding='utf-8') as f:
        return json.load(f)

def display_manifest(
        index: int, 
        manifest_dir: Path, 
        dataset: 'datasets.Dataset' = GSM8K_TRAIN):
    """Loads and displays the final, streamlined Formalization Manifest."""
    try:
        manifest = load_manifest(index, manifest_dir)

        # --- Extract info and display Top-Level ---
        code = manifest.get('function_code', '# Code not found')
        display(Markdown(f"# Manifest for Index: **{index}**"))
        display(Markdown("## Question"))
        display(Markdown(f"> {dataset[index]['question']}"))

        # --- Display Function Code ---
        display(Markdown("## Function Code"))
        display(Markdown(f"```python\n{code}\n```"))

        # --- Display Logical Steps in a DataFrame ---
        display(Markdown("## Logical Steps"))
        steps = manifest.get('logical_steps', [])
        if not steps:
            print("No logical steps found in the manifest.")
            return
        
        df_steps = pd.DataFrame(steps)
        original_solution = build_solution_mapping(
            index=index, 
            dataset=dataset, 
            exclude_FA=True
        )
        
        df_steps['original_solution_line'] = df_steps['line_number'].apply(
            lambda ln: original_solution.get(ln, "N/A")
        )
        
        column_order = [
            'line_number', 
            'original_solution_line',
            'solution_line_template',
            'new_inputs',
            'output_variable'
        ]
        
        existing_columns_ordered = [col for col in column_order if col in df_steps.columns]
        df_steps = df_steps[existing_columns_ordered]
        
        pd.set_option('display.max_colwidth', None)
        display(df_steps)
    except Exception as e:
        print(f"An error occurred: {e}")

Loaded environment variables from .env file.
API clients initialized successfully.
API concurrency limits set to: {'google': 2, 'anthropic': 2, 'openai': 4}
Project root found at: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math
Data directory found at: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data
Sample manifests directory found at: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/sample_manifests
Raw manifest output directory set to: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/generated_manifests_raw


### Visualization of complete manifests for chosen examples

In [3]:
for index in EXAMPLE_INDICES:
    display_manifest(index, SAMPLE_MANIFESTS_DIR)
    print("\n" + "="*80 + "\n")

# Manifest for Index: **54**

## Question

> Leah earned $28 working odd jobs around the neighborhood. She spent a seventh of it on a milkshake and put half of the rest in her savings account. She left the remaining money in her wallet. Her dog got ahold of her wallet and shredded all the money inside but $1. How many dollars did Leah lose?

## Function Code

```python
def solve():
    """Index: 54.
    Returns: the amount of money Leah lost.
    """
    # L1
    initial_earnings = 28 # Leah earned $28
    milkshake_denominator = 7 # spent a seventh of it
    milkshake_cost = initial_earnings / milkshake_denominator

    # L2
    money_after_milkshake = initial_earnings - milkshake_cost

    # L3
    savings_denominator = 2 # put half of the rest
    money_in_wallet = money_after_milkshake / savings_denominator

    # L4
    money_left_intact = 1 # shredded all the money inside but $1
    money_lost = money_in_wallet - money_left_intact

    # FA
    answer = money_lost
    return answer
```

## Logical Steps

Unnamed: 0,line_number,original_solution_line,solution_line_template,new_inputs,output_variable
0,L1,Leah spent 28 / 7 = $<<28/7=4>>4 on a milkshake.,Leah spent {initial_earnings} / {milkshake_denominator} = $<<{initial_earnings}/{milkshake_denominator}={milkshake_cost}>>{milkshake_cost} on a milkshake.,"[initial_earnings, milkshake_denominator]",milkshake_cost
1,L2,She had 28 - 4 = $<<28-4=24>>24 left.,She had {initial_earnings} - {milkshake_cost} = $<<{initial_earnings}-{milkshake_cost}={money_after_milkshake}>>{money_after_milkshake} left.,[],money_after_milkshake
2,L3,"She put half in her savings account and half in her wallet, so she had 24 / 2 = $<<24/2=12>>12 in her wallet.","She put half in her savings account and half in her wallet, so she had {money_after_milkshake} / {savings_denominator} = $<<{money_after_milkshake}/{savings_denominator}={money_in_wallet}>>{money_in_wallet} in her wallet.",[savings_denominator],money_in_wallet
3,L4,"Her dog shredded all the money in her wallet but $1, so Leah lost 12 - 1 = $<<12-1=11>>11.","Her dog shredded all the money in her wallet but ${money_left_intact}, so Leah lost {money_in_wallet} - {money_left_intact} = $<<{money_in_wallet}-{money_left_intact}={money_lost}>>{money_lost}.",[money_left_intact],money_lost






# Manifest for Index: **72**

## Question

> Nancy is filling an aquarium for her fish. She fills it halfway and goes to answer the door. While she's gone, her cat knocks the aquarium over and spills half the water in it. Then Nancy comes back and triples the amount of water in the aquarium. If the aquarium is 4 feet long, 6 feet wide, and 3 feet high, how many cubic feet of water are in the aquarium?

## Function Code

```python
from fractions import Fraction

def solve():
    """Index: 72.
    Returns: how many cubic feet of water are in the aquarium.
    """
    # L1
    length = 4 # 4 feet long
    width = 6 # 6 feet wide
    height = 3 # 3 feet high
    total_volume = length * width * height

    # L2
    initial_fill_proportion = Fraction(1, 2) # fills it halfway
    spilled_proportion = Fraction(1, 2) # spills half the water
    proportion_after_spill = initial_fill_proportion * spilled_proportion

    # L3
    refill_multiplier = 3 # triples the amount of water
    final_proportion = proportion_after_spill * refill_multiplier

    # L4
    water_volume = total_volume * final_proportion

    # FA
    answer = water_volume
    return answer
```

## Logical Steps

Unnamed: 0,line_number,original_solution_line,solution_line_template,new_inputs,output_variable
0,L1,"First calculate the volume of the aquarium by multiplying its length, width and height: 4 ft * 6 ft * 3 ft = <<4*6*3=72>>72 cubic ft","First calculate the volume of the aquarium by multiplying its length, width and height: {length} ft * {width} ft * {height} ft = <<{length}*{width}*{height}={total_volume}>>{total_volume} cubic ft","[length, width, height]",total_volume
1,L2,Then figure out what proportion of the aquarium is full after the cat knocks it over: 1/2 * 1/2 = 1/4,Then figure out what proportion of the aquarium is full after the cat knocks it over: {initial_fill_proportion} * {spilled_proportion} = {proportion_after_spill},"[initial_fill_proportion, spilled_proportion]",proportion_after_spill
2,L3,Then figure out what proportion of the aquarium is full after Nancy refills it: 3 * 1/4 = 3/4,Then figure out what proportion of the aquarium is full after Nancy refills it: {refill_multiplier} * {proportion_after_spill} = {final_proportion},[refill_multiplier],final_proportion
3,L4,Now multiply the proportion of the aquarium that's full by the aquarium's volume to find out how much water is in it: 72 cubic ft * 3/4 = <<72*3/4=54>>54 cubic ft,Now multiply the proportion of the aquarium that's full by the aquarium's volume to find out how much water is in it: {total_volume} cubic ft * {final_proportion} = <<{total_volume}*{final_proportion}={water_volume}>>{water_volume} cubic ft,[],water_volume






# Manifest for Index: **310**

## Question

> Janet hires six employees. Four of them are warehouse workers who make $15/hour, and the other two are managers who make $20/hour. Janet has to pay 10% of her workers' salaries in FICA taxes. If everyone works 25 days a month and 8 hours a day, how much does Janet owe total for their wages and taxes for one month?

## Function Code

```python
def solve():
    """Index: 310.
    Returns: the total amount Janet owes for wages and taxes for one month.
    """
    # L1
    days_per_month = 25 # works 25 days a month
    hours_per_day = 8 # and 8 hours a day
    hours_per_worker_per_month = days_per_month * hours_per_day

    # L2
    warehouse_worker_wage = 15 # make $15/hour
    wage_per_warehouse_worker = hours_per_worker_per_month * warehouse_worker_wage

    # L3
    num_warehouse_workers = 4 # Four of them are warehouse workers
    total_warehouse_wages = wage_per_warehouse_worker * num_warehouse_workers

    # L4
    manager_wage = 20 # make $20/hour
    wage_per_manager = hours_per_worker_per_month * manager_wage

    # L5
    num_managers = 2 # the other two are managers
    total_manager_wages = wage_per_manager * num_managers

    # L6
    total_wages = total_manager_wages + total_warehouse_wages

    # L7
    fica_tax_rate = 0.1 # pay 10% of her workers' salaries in FICA taxes
    fica_taxes = total_wages * fica_tax_rate

    # L8
    grand_total = total_wages + fica_taxes

    # FA
    answer = grand_total
    return answer
```

## Logical Steps

Unnamed: 0,line_number,original_solution_line,solution_line_template,new_inputs,output_variable
0,L1,First figure out how many hours each worker works per month by multiplying the number of days they work by the number of hours a day they work: 25 days * 8 hours/day = <<25*8=200>>200 hours,First figure out how many hours each worker works per month by multiplying the number of days they work by the number of hours a day they work: {days_per_month} days * {hours_per_day} hours/day = <<{days_per_month}*{hours_per_day}={hours_per_worker_per_month}>>{hours_per_worker_per_month} hours,"[days_per_month, hours_per_day]",hours_per_worker_per_month
1,L2,Then calculate how much one warehouse worker makes per month by multiplying their hourly rate by the number of hours they work: 200 hours * $15/hour = $<<200*15=3000>>3000,Then calculate how much one warehouse worker makes per month by multiplying their hourly rate by the number of hours they work: {hours_per_worker_per_month} hours * ${warehouse_worker_wage}/hour = $<<{hours_per_worker_per_month}*{warehouse_worker_wage}={wage_per_warehouse_worker}>>{wage_per_warehouse_worker},[warehouse_worker_wage],wage_per_warehouse_worker
2,L3,"Then multiply that number by 4 to find out how much all the warehouse workers make: $3000/worker * 4 workers = $<<3000*4=12000>>12,000",Then multiply that number by {num_warehouse_workers} to find out how much all the warehouse workers make: ${wage_per_warehouse_worker}/worker * {num_warehouse_workers} workers = $<<{wage_per_warehouse_worker}*{num_warehouse_workers}={total_warehouse_wages}>>{total_warehouse_wages},[num_warehouse_workers],total_warehouse_wages
3,L4,"Now multiply the hours each manager works (also 200) by their hourly wage to find out how much one manager makes per month: 200 hours * $20/hour = $<<200*20=4000>>4,000",Now multiply the hours each manager works (also {hours_per_worker_per_month}) by their hourly wage to find out how much one manager makes per month: {hours_per_worker_per_month} hours * ${manager_wage}/hour = $<<{hours_per_worker_per_month}*{manager_wage}={wage_per_manager}>>{wage_per_manager},[manager_wage],wage_per_manager
4,L5,"Now multiply one manager's wages by the number of managers (2) to find their total wage amount: $4,000/manager * 2 managers = $<<4000*2=8000>>8,000",Now multiply one manager's wages by the number of managers (2) to find their total wage amount: ${wage_per_manager}/manager * {num_managers} managers = $<<{wage_per_manager}*{num_managers}={total_manager_wages}>>{total_manager_wages},[num_managers],total_manager_wages
5,L6,"Now add the wages for the managers and the workers to find the total cost of the wages: $8,000 + $12,000 = $<<8000+12000=20000>>20,000",Now add the wages for the managers and the workers to find the total cost of the wages: ${total_manager_wages} + ${total_warehouse_wages} = $<<{total_manager_wages}+{total_warehouse_wages}={total_wages}>>{total_wages},[],total_wages
6,L7,"Now multiply the total wage bill by 10% to find how much the FICA taxes are: $20,000 * .1 = $<<20000*.1=2000>>2,000",Now multiply the total wage bill by 10% to find how much the FICA taxes are: ${total_wages} * {fica_tax_rate} = $<<{total_wages}*{fica_tax_rate}={fica_taxes}>>{fica_taxes},[fica_tax_rate],fica_taxes
7,L8,"Now add the total wage bill to the total tax amount to find the grand total: $2,000 + $20,000 = $<<2000+20000=22000>>22,000",Now add the total wage bill to the total tax amount to find the grand total: ${fica_taxes} + ${total_wages} = $<<{fica_taxes}+{total_wages}={grand_total}>>{grand_total},[],grand_total






### Assembly of user prompts

In [4]:
SYSTEM_PROMPT = """You are a data formalization expert who excels in mathematical reasoning and writing python code. You will be presented with math word problems accompanied by step-by-step natural language solutions. You goal is to carefully and meticulously analyze the given question and solution, and formalize it by converting it into a structured json object that deconstructs the logic of the solution.

You MUST follow all rules and formatting instructions provided in the user prompt without deviation. Your entire output MUST be a single JSON object wrapped in ```json ... ```. Do not include any text or explanation before or after the JSON object."""

FORMAT_GUIDELINES = """In the TASK below, you will be given a math problem and its corresponding step-by-step solution. Each step in the solution is numbered (e.g. "L1", "L2" and so on), and many of the steps include calculator annotations (e.g. "<<20*0.1=2>>"). Your goal is to convert this information into a structured JSON object according to the following schema and detailed instructions.

# JSON Schema Definition

Your output must adhere to the following JSON structure:

```json
{
  "function_code": "A single string containing a complete, self-contained Python function that constitutes an end-to-end formalization of the solution.",
  "logical_steps": [
    {
      "line_number": "The line number from the original solution (e.g., 'L1', 'L2').",
      "new_inputs": "A (possibly empty) list of strings, where each string is the name of a variable being defined for the first time in this step.",
      "output_variable": "The name of the variable being assigned as the result of the main computation in this step.",
      "solution_line_template": "The complete original line from the solution, including the calculator annotation, with all computational numbers replaced by `{variable_name}` placeholders."
    }
  ]
}
```

# Detailed Field Instructions

## "function_code"

This string must contain a Python function with the following characteristics:

1. **Conditional Imports:** The function_code string should contain no imports, with one exception: if the function body uses the Fraction class (e.g., rate = Fraction(1, 10)), then the very first line of the function_code string MUST be from fractions import Fraction. If not, then the very first line MUST be the function definition (i.e. `def solve():`).

2.  **Function Naming & Docstring:** The function must be named `solve`, and it should not have any args. It must begin with a docstring that has exactly two lines:
    *   The first line must be: "Index: [Index]." using the index from the task header.
    *   The second line must be a succinct, one-sentence description of what the function returns (e.g., "Returns: the total cost of wages and taxes.").

3. **Line comments:** For each solution line that is used to compute the final answer, include a comment of the form `# L1`, `# L2` and so on, which references the line number. 
    *   Such a comment must immediately be followed by a code block that precisely formalizes the corresponding solution line. 
    *   If a solution line does not contain any computation relevant to the final answer, then omit it completely from the function code and do NOT add a corresponding line comment.

4. **Code blocks:** Each code block must consist of the following:
    *   First, define the `new_inputs`. These are the variables (if any) needed for the computation in the solution line, which have not already been defined, and whose values are stated in (or can be extracted from) the `question`. Each new input variable definition MUST be followed on the same line by a comment (`#`) that quotes or refers to the phrase in the `question` from which it is extracted.
    *   Second, there should be EXACTLY ONE line of code which formalizes the computation in the solution line and assigns the resulting value to a new variable (this is the `output_variable`).

5. **The Direct Substitution Rule:** This is the most important rule, which ensures that the `nl_template` is purely identical to the original solution line except that numerical values in computations have been replaced with variable placeholders: You MUST define variables in such a way that they can be DIRECTLY SUBSTITUTED into the solution text without changing any operators. For example:
    *   If the solution line has a computation like `... / 5`, you MUST define a variable like `var = 5`.
    *   If the solution line has a computation like `... * 1/5`, you MUST define a variable like `var = Fraction(1, 5)`. 
    *   If the solution line has a computation like `... * 0.2`, you MUST define a variable like `var = 0.2`.

6. **Final Answer:** The line that assigns the final result to the `answer` variable must be immediately preceded by a line containing only the comment `# FA`. The last line of the function must always return the `answer` variable.

## "solution_line_template"

These artifacts will serve as precise links between the solution line and the code line. 
*   The template should be EXACTLY identical to the original solution line, with the ONLY CHANGES being that every numerical value used in a computation is replaced by its corresponding `{variable_name}` placeholder. This applies to the entire content of the solution line, including the inside and outside of the calculator annotations. 
*   In particular, EVERY SINGLE numerical value appearing inside the calculator annotation MUST be replaced with a `{variable_name}` placeholder.
*   Note: The Direct Substitution Rule will ensure that for correctly defined variables, it will be possible to replace the numerical values with variable name placeholders while leaving all surrounding text, symbols, and operators unchanged. 
*   Thus, in a correct `nl_template`, the calculator annotation will not contain any numerical values, and moreover, replacing each `{variable_name}` by its value should exactly recover the original solution line, including the original calculator annotation. 
*   Note: If a number appears in different forms (e.g., as "10%" in the narrative and as ".1" in the calculation), only the form that appears in the calculation should be replaced with a placeholder.
"""

def assemble_example(
    index: int, 
    manifest_dir: Path, 
    dataset: 'datasets.Dataset'
) -> str:
    """
    Assembles a single, complete few-shot example string (Input + Output). 
    Returns a formatted string for one few-shot example.
    """
    try:
        # 1. Construct the Input block
        question = dataset[index]['question']
        solution_map = build_solution_mapping(index, dataset, exclude_FA=True)
        
        input_data = {
            "index": index,
            "question": question,
            "solution_mapping": solution_map
        }
        
        input_json_str = json.dumps(input_data, indent=2)
        input_block = f"**Input:**\n\n{input_json_str}\n"

        # 2. Construct the Output block
        manifest_path = manifest_dir / f'_{index}_alt.json'
        with open(manifest_path, 'r', encoding='utf-8') as f:
            manifest_content = json.load(f)
        
        output_json_str = json.dumps(manifest_content, indent=2)
        output_block = f"**Output:**\n```json\n{output_json_str}\n```"

        return f"{input_block}\n\n{output_block}"

    except Exception as e:
        print(f"Warning: Failed to assemble example for index {index}. Error: {e}")
        return "" # Return empty string on failure

def assemble_static_prefix(
    format_guidelines: str = FORMAT_GUIDELINES,
    example_indices: List[int] = EXAMPLE_INDICES,
    manifest_dir: Path = SAMPLE_MANIFESTS_DIR,
    dataset: 'datasets.Dataset' = GSM8K_TRAIN
) -> str:
    """
    Constructs the static user prompt prefix, including guidelines and few-shot examples.

    Args:
        format_guidelines: The string containing the rules and schema.
        example_indices: A list of integer indices for the few-shot examples.
        manifest_dir: The path to the manifest directory.
        dataset: The loaded Hugging Face dataset.

    Returns:
        A single string containing the complete user prompt.
    """
    # 1. Start with the guidelines
    prompt_parts = [format_guidelines, "\n---\n", "\n### Examples"]

    # 2. Assemble and append each few-shot example
    for index in example_indices:
        example_str = assemble_example(index, manifest_dir, dataset)
        if example_str:
            prompt_parts.append(f"\n\n---\n\n{example_str}")
            
    return "".join(prompt_parts) # Use join for efficiency

def assemble_user_prompt(
    index_to_generate: int,
    static_prefix: str,
    dataset: 'datasets.Dataset' = GSM8K_TRAIN
) -> str:
    """
    Appends the final task block to the static prefix to create a complete user prompt.

    Args:
        index_to_generate: The new problem index to generate a manifest for.
        static_prefix: The pre-computed string containing guidelines and few-shot examples.
        dataset: The loaded Hugging Face dataset.

    Returns:
        A single string containing the complete user prompt, ready for an API call.
    """
    # 1. Assemble the final input block for the new task
    try:
        question = dataset[index_to_generate]['question']
        solution_map = build_solution_mapping(
            index=index_to_generate, 
            dataset=dataset, 
            exclude_FA=True
        )
        
        task_input_data = {
            "index": index_to_generate,
            "question": question,
            "solution_mapping": solution_map
        }
        task_input_json_str = json.dumps(task_input_data, indent=2)
        
        # This is the final part of the prompt that asks the model for the new output
        task_block = f"\n\n--- TASK ---\n\n**Input:**\n\n{task_input_json_str}\n\n**Output:**"

    except Exception as e:
        print(f"Error: Could not assemble task block for index {index_to_generate}. Error: {e}")
        return "" # Return empty string on failure

    # 2. Combine the static prefix with the new task block
    return static_prefix + task_block

# Generate the static prefix ONCE.
STATIC_USER_PROMPT_PREFIX = assemble_static_prefix()
print(f"Static user prompt prefix generated successfully. Length: {len(STATIC_USER_PROMPT_PREFIX)} characters.")

Static user prompt prefix generated successfully. Length: 18019 characters.


### Visualizing a sample full user prompt

In [5]:
INDEX_TO_GENERATE = 49

full_user_prompt = assemble_user_prompt(
    index_to_generate=INDEX_TO_GENERATE,
    static_prefix=STATIC_USER_PROMPT_PREFIX
)

print(f"Assembled user prompt for Index {INDEX_TO_GENERATE}. Length: {len(full_user_prompt)} characters.")
print("\n--- Full User Prompt Below ---\n")
print(full_user_prompt)

Assembled user prompt for Index 49. Length: 18480 characters.

--- Full User Prompt Below ---

In the TASK below, you will be given a math problem and its corresponding step-by-step solution. Each step in the solution is numbered (e.g. "L1", "L2" and so on), and many of the steps include calculator annotations (e.g. "<<20*0.1=2>>"). Your goal is to convert this information into a structured JSON object according to the following schema and detailed instructions.

# JSON Schema Definition

Your output must adhere to the following JSON structure:

```json
{
  "function_code": "A single string containing a complete, self-contained Python function that constitutes an end-to-end formalization of the solution.",
  "logical_steps": [
    {
      "line_number": "The line number from the original solution (e.g., 'L1', 'L2').",
      "new_inputs": "A (possibly empty) list of strings, where each string is the name of a variable being defined for the first time in this step.",
      "output_variab

### Setting the models to be used for generation

In [6]:
MODEL_DICT = {
  "openai": "gpt-4.1",
  "google": "gemini-2.5-flash"
}

MODELS = [f"{provider}_{model}" for provider, model in MODEL_DICT.items()]
print(f"Available models: {MODELS}")

Available models: ['openai_gpt-4.1', 'google_gemini-2.5-flash']


### Main code for making API calls to generate manifests

In [7]:
import time
import datetime
import pandas as pd
from tqdm.notebook import tqdm
from typing import List, Dict, Any

# --- 1. API Call Retry Wrapper (Unchanged) ---
# This helper function is robust and does not need modification.
async def _with_429_retries(send_coroutine_factory, *, max_attempts=3, default_wait=15):
    attempt = 0
    while True:
        try:
            return await send_coroutine_factory()
        except Exception as e:
            if ("429" in str(e)) and attempt < max_attempts:
                wait_seconds = default_wait
                if hasattr(e, "retry_delay") and hasattr(e.retry_delay, "seconds"):
                    wait_seconds = e.retry_delay.seconds
                attempt += 1
                print(f"🕒 429 Error received. Retrying in {wait_seconds}s (Attempt {attempt}/{max_attempts})")
                await asyncio.sleep(wait_seconds)
            else:
                raise


# --- 2. Core API Calling Logic (Updated to return usage) ---

async def call_single_model_async(
    provider: str,
    model: str,
    system_prompt: str,
    user_prompt: str,
    semaphore: asyncio.Semaphore
) -> (str, Dict[str, int]):
    """
    Calls a single LLM API and returns the text response and a token usage dictionary.
    """
    async with semaphore:
        text_response = ""
        usage = {"input_tokens": 0, "output_tokens": 0, "cached_tokens": 0}

        if provider == "google":
            gemini = genai.GenerativeModel(model_name=model, system_instruction=system_prompt)
            cfg = genai.types.GenerationConfig(temperature=0.1, max_output_tokens=4000)
            response = await _with_429_retries(lambda: gemini.generate_content_async(user_prompt, generation_config=cfg))
            text_response = response.text
            if response.usage_metadata:
                usage["input_tokens"] = response.usage_metadata.prompt_token_count
                usage["output_tokens"] = response.usage_metadata.candidates_token_count

        elif provider == "openai":
            messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
            response = await openai_client_async.chat.completions.create(
                model=model, messages=messages, temperature=0.1, max_tokens=4000, response_format={"type": "json_object"}
            )
            text_response = response.choices[0].message.content
            if response.usage:
                usage["input_tokens"] = response.usage.prompt_tokens
                usage["output_tokens"] = response.usage.completion_tokens
                # Safely get cached tokens if the details are available
                if hasattr(response.usage, 'prompt_tokens_details') and response.usage.prompt_tokens_details:
                     usage["cached_tokens"] = response.usage.prompt_tokens_details.get("cached_tokens", 0)

        else:
            raise ValueError(f"Unknown provider specified: {provider}")

        return text_response, usage


# --- 3. Per-Problem Orchestration (Updated to handle usage data) ---

async def run_one_problem_async(
    index: int, 
    static_prefix: str,
    system_prompt: str,
    model_dict: Dict[str, str],
    provider_sems: Dict[str, asyncio.Semaphore], 
    output_dir: Path,
    results_list: List[Dict],
    pbar: tqdm
):
    """
    Generates manifests for a single problem index and appends performance results to a shared list.
    """
    user_prompt = assemble_user_prompt(index, static_prefix=static_prefix)
    
    tasks = []
    for provider, model in model_dict.items():
        task = asyncio.create_task(
            call_single_model_async(provider, model, system_prompt, user_prompt, provider_sems[provider])
        )
        task.meta = {"provider": provider, "model": model, "index": index, "start_time": time.time()}
        tasks.append(task)
        
    task_results = await asyncio.gather(*tasks, return_exceptions=True)
    
    for task, result in zip(tasks, task_results):
        meta = task.meta
        elapsed = time.time() - meta["start_time"]
        status = "Failed"
        usage = {"input_tokens": 0, "output_tokens": 0, "cached_tokens": 0}
        
        output_path = output_dir / str(meta['index']) / f"{meta['provider']}_{meta['model']}.txt"
        output_path.parent.mkdir(parents=True, exist_ok=True)

        if isinstance(result, Exception):
            error_message = f"--- ERROR ---\nIndex: {meta['index']}, Model: {meta['model']}\n{type(result).__name__}: {result}"
            output_path.write_text(error_message, encoding='utf-8')
            print(f"❌ Error for Index {meta['index']}, Model {meta['model']}: {type(result).__name__}")
        else:
            text_response, usage = result
            output_path.write_text(text_response, encoding='utf-8')
            status = "Success"
        
        # Append detailed results for final DataFrame
        results_list.append({
            "provider": meta["provider"], "model": meta["model"], "index": meta["index"],
            "status": status, "time_s": round(elapsed, 2),
            "input_tokens": usage["input_tokens"], "output_tokens": usage["output_tokens"],
            "cached_tokens": usage["cached_tokens"],
            "utc_completed": datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds")
        })
    
    pbar.update(1)


# --- 4. Main Batch Generation Function (Updated to return DataFrame) ---

async def generate_manifests_parallel(
    indices_to_generate: List[int],
    model_dict: Dict[str, str] = MODEL_DICT,
    system_prompt: str = SYSTEM_PROMPT,
    output_dir: Path = MANIFEST_OUTPUT_DIR,
    concurrency_limits: Dict[str, int] = API_CONCURRENCY_LIMITS
) -> pd.DataFrame:
    """
    Runs the manifest generation process and returns a DataFrame with performance stats.
    """
    print("--- Starting Manifest Generation ---")
    start_time = time.time()
    
    static_prefix = assemble_static_prefix()
    provider_semaphores = {prov: asyncio.Semaphore(limit) for prov, limit in concurrency_limits.items()}
    results_data = [] # Shared list to collect results

    with tqdm(total=len(indices_to_generate), desc="Generating Manifests") as pbar:
        problem_tasks = [
            run_one_problem_async(
                index=idx, static_prefix=static_prefix, system_prompt=system_prompt,
                model_dict=model_dict, provider_sems=provider_semaphores,
                output_dir=output_dir, results_list=results_data, pbar=pbar
            )
            for idx in indices_to_generate
        ]
        await asyncio.gather(*problem_tasks)

    # Create and save the performance DataFrame
    df = pd.DataFrame(results_data)
    run_ts = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%d_%H%M%S")
    csv_path = output_dir / f"generation_performance_{run_ts}.csv"
    df.to_csv(csv_path, index=False)
    
    end_time = time.time()
    print(f"\n--- Manifest Generation Complete ---")
    print(f"Processed {len(indices_to_generate)} indices in {end_time - start_time:.2f} seconds.")
    print(f"Performance log saved to: {csv_path}")
    
    return df

### Run the generation for a chosen set of indices

In [8]:
INDICES_TO_GENERATE = list(range(10)) 
performance_df = await generate_manifests_parallel(indices_to_generate=INDICES_TO_GENERATE)

# Display the first few rows of the performance log
print("\n--- Performance Summary ---")
display(performance_df.head())

--- Starting Manifest Generation ---


Generating Manifests:   0%|          | 0/10 [00:00<?, ?it/s]

I0000 00:00:1751827963.234069 11037641 fork_posix.cc:71] Other threads are currently calling into gRPC, skipping fork() handlers


❌ Error for Index 7, Model gpt-4.1: RateLimitError
❌ Error for Index 8, Model gpt-4.1: RateLimitError
❌ Error for Index 9, Model gpt-4.1: RateLimitError

--- Manifest Generation Complete ---
Processed 10 indices in 43.18 seconds.
Performance log saved to: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/generated_manifests_raw/generation_performance_20250706_185326.csv

--- Performance Summary ---


Unnamed: 0,provider,model,index,status,time_s,input_tokens,output_tokens,cached_tokens,utc_completed
0,openai,gpt-4.1,1,Success,4.51,4895,332,4736,2025-07-06T18:52:47+00:00
1,google,gemini-2.5-flash,1,Success,4.51,5623,402,0,2025-07-06T18:52:47+00:00
2,openai,gpt-4.1,0,Success,5.89,4894,314,4736,2025-07-06T18:52:49+00:00
3,google,gemini-2.5-flash,0,Success,5.89,5622,401,0,2025-07-06T18:52:49+00:00
4,openai,gpt-4.1,2,Success,9.94,4956,467,0,2025-07-06T18:52:53+00:00


In [9]:
performance_df

Unnamed: 0,provider,model,index,status,time_s,input_tokens,output_tokens,cached_tokens,utc_completed
0,openai,gpt-4.1,1,Success,4.51,4895,332,4736,2025-07-06T18:52:47+00:00
1,google,gemini-2.5-flash,1,Success,4.51,5623,402,0,2025-07-06T18:52:47+00:00
2,openai,gpt-4.1,0,Success,5.89,4894,314,4736,2025-07-06T18:52:49+00:00
3,google,gemini-2.5-flash,0,Success,5.89,5622,401,0,2025-07-06T18:52:49+00:00
4,openai,gpt-4.1,2,Success,9.94,4956,467,0,2025-07-06T18:52:53+00:00
5,google,gemini-2.5-flash,2,Success,9.94,5696,556,0,2025-07-06T18:52:53+00:00
6,openai,gpt-4.1,3,Success,12.75,4977,568,4736,2025-07-06T18:52:55+00:00
7,google,gemini-2.5-flash,3,Success,12.75,5722,724,0,2025-07-06T18:52:55+00:00
8,openai,gpt-4.1,4,Success,16.25,4909,451,4736,2025-07-06T18:52:59+00:00
9,google,gemini-2.5-flash,4,Success,16.25,5638,636,0,2025-07-06T18:52:59+00:00


In [10]:
performance_df_2 = await generate_manifests_parallel(
    indices_to_generate=[7,8,9],
    model_dict={'openai': 'gpt-4.1'}
)

# Display the first few rows of the performance log
print("\n--- Performance Summary ---")
display(performance_df_2)

--- Starting Manifest Generation ---


Generating Manifests:   0%|          | 0/3 [00:00<?, ?it/s]


--- Manifest Generation Complete ---
Processed 3 indices in 10.35 seconds.
Performance log saved to: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/generated_manifests_raw/generation_performance_20250706_185713.csv

--- Performance Summary ---


Unnamed: 0,provider,model,index,status,time_s,input_tokens,output_tokens,cached_tokens,utc_completed
0,openai,gpt-4.1,8,Success,3.79,5027,508,4736,2025-07-06T18:57:07+00:00
1,openai,gpt-4.1,7,Success,4.6,5031,552,4736,2025-07-06T18:57:07+00:00
2,openai,gpt-4.1,9,Success,10.34,5208,1162,4736,2025-07-06T18:57:13+00:00


In [12]:
# merge the two DataFrames
merged_df = pd.concat([performance_df, performance_df_2], ignore_index=True)

# save to csv and over-write the previous one
run_ts = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%d_%H%M%S")
merged_df.to_csv(MANIFEST_OUTPUT_DIR / f"generation_performance_{run_ts}.csv", index=False)

# display
display(merged_df)

Unnamed: 0,provider,model,index,status,time_s,input_tokens,output_tokens,cached_tokens,utc_completed
0,openai,gpt-4.1,1,Success,4.51,4895,332,4736,2025-07-06T18:52:47+00:00
1,google,gemini-2.5-flash,1,Success,4.51,5623,402,0,2025-07-06T18:52:47+00:00
2,openai,gpt-4.1,0,Success,5.89,4894,314,4736,2025-07-06T18:52:49+00:00
3,google,gemini-2.5-flash,0,Success,5.89,5622,401,0,2025-07-06T18:52:49+00:00
4,openai,gpt-4.1,2,Success,9.94,4956,467,0,2025-07-06T18:52:53+00:00
5,google,gemini-2.5-flash,2,Success,9.94,5696,556,0,2025-07-06T18:52:53+00:00
6,openai,gpt-4.1,3,Success,12.75,4977,568,4736,2025-07-06T18:52:55+00:00
7,google,gemini-2.5-flash,3,Success,12.75,5722,724,0,2025-07-06T18:52:55+00:00
8,openai,gpt-4.1,4,Success,16.25,4909,451,4736,2025-07-06T18:52:59+00:00
9,google,gemini-2.5-flash,4,Success,16.25,5638,636,0,2025-07-06T18:52:59+00:00


In [13]:
# ---------------------------------------------------------------------- #
#  Configuration
# ---------------------------------------------------------------------- #

# Define the source and destination directories
# Assumes DATA_DIR has been defined in a previous cell
SOURCE_DIR = DATA_DIR / 'generated_manifests_raw'
DEST_DIR = DATA_DIR / 'generated_manifests_json'

# Create the destination directory if it doesn't exist
DEST_DIR.mkdir(parents=True, exist_ok=True)
print(f"Source directory: {SOURCE_DIR}")
print(f"Destination directory for cleaned JSON: {DEST_DIR}")

# Regex to find a JSON block fenced by ```json ... ``` or ``` ... ```
# It is non-greedy and handles potential whitespace.
_JSON_FENCE_RE = re.compile(r"```(?:json)?\s*\r?\n(.*?)\r?\n```", re.DOTALL)


# ---------------------------------------------------------------------- #
#  Cleaning and Processing Function
# ---------------------------------------------------------------------- #

def process_and_clean_manifests(
    source_dir: Path = SOURCE_DIR,
    dest_dir: Path = DEST_DIR
):
    """
    Traverses the source directory, cleans raw .txt model outputs,
    validates them as JSON, and writes them as .json files to the destination.

    - If a ```json ... ``` fence is found, its content is extracted.
    - Otherwise, the entire file content is used.
    - The extracted string is validated as JSON before being written.
    """
    if not source_dir.is_dir():
        print(f"❌ Error: Source directory '{source_dir}' not found.")
        return

    files_processed = 0
    files_written = 0
    files_failed = 0
    
    # Recursively find all .txt files in the source directory and its subfolders
    for txt_path in sorted(source_dir.rglob("*.txt")):
        files_processed += 1
        cleaned_json_str = ""
        
        try:
            # Determine the corresponding output path, preserving subdirectories
            relative_path = txt_path.relative_to(source_dir)
            dest_path = dest_dir / relative_path.with_suffix('.json')
            dest_path.parent.mkdir(parents=True, exist_ok=True)
            
            # Read the raw content from the source file
            raw_content = txt_path.read_text(encoding="utf-8")

            # Try to find and extract a fenced JSON block
            match = _JSON_FENCE_RE.search(raw_content)
            if match:
                cleaned_json_str = match.group(1).strip()
                print(f"✓ Extracted fenced JSON from: {relative_path}")
            else:
                # If no fence is found, assume the whole file is the JSON string
                cleaned_json_str = raw_content.strip()
                print(f"⚠ No fence found, using full file: {relative_path}")

            # Validate that the extracted string is valid JSON before writing
            json.loads(cleaned_json_str) # This will raise an error if invalid
            
            # Write the clean, valid JSON to the destination file
            dest_path.write_text(cleaned_json_str, encoding="utf-8")
            files_written += 1

        except json.JSONDecodeError:
            print(f"❌ Invalid JSON content in: {relative_path}. File not written.")
            files_failed += 1
        except Exception as e:
            print(f"❌ An unexpected error occurred while processing {relative_path}: {e}")
            files_failed += 1
    
    print("\n--- Processing Complete ---")
    print(f"Total files found: {files_processed}")
    print(f"Successfully written: {files_written}")
    print(f"Failed to process: {files_failed}")

# ====================================================================
#              --== Run the Cleaning Process ==--
# ====================================================================

process_and_clean_manifests()

Source directory: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/generated_manifests_raw
Destination directory for cleaned JSON: /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/generated_manifests_json
✓ Extracted fenced JSON from: 0/google_gemini-2.5-flash-lite-preview-06-17.txt
✓ Extracted fenced JSON from: 0/google_gemini-2.5-flash.txt
⚠ No fence found, using full file: 0/openai_gpt-4.1-mini.txt
⚠ No fence found, using full file: 0/openai_gpt-4.1.txt
✓ Extracted fenced JSON from: 1/google_gemini-2.5-flash-lite-preview-06-17.txt
✓ Extracted fenced JSON from: 1/google_gemini-2.5-flash.txt
⚠ No fence found, using full file: 1/openai_gpt-4.1-mini.txt
⚠ No fence found, using full file: 1/openai_gpt-4.1.txt
✓ Extracted fenced JSON from: 2/google_gemini-2.5-flash-lite-preview-06-17.txt
✓ Extracted fenced JSON from: 2/google_gemini-2.5-flash.txt
⚠ No fence found, using full file: 2/openai_gpt-4.1-mini.txt
⚠ No fence found, using full file: 2/openai_gpt-4.1.txt

In [14]:
import pandas as pd

# ---------------------------------------------------------------------- #
#  Display Function for Generated Manifests
# ---------------------------------------------------------------------- #

def display_generated_manifest(
        index: int, 
        model_name: str,
        manifest_dir: Path = DEST_DIR):
    """
    Loads and displays a generated manifest from a specific model and index.

    Args:
        index: The problem index.
        model_name: The name of the model (e.g., 'openai_gpt-4.1-mini').
    """
    try:
        # Construct the path to the specific JSON file
        manifest_path = manifest_dir / str(index) / f"{model_name}.json"
        
        if not manifest_path.exists():
            print(f"❌ Error: Manifest file not found at {manifest_path}")
            return

        with open(manifest_path, 'r', encoding='utf-8') as f:
            manifest = json.load(f)

        # --- Display Top-Level Information ---
        display(Markdown(f"# Generated Manifest for Index: `{index}` | Model: `{model_name}`"))
        display(Markdown("## Question"))
        display(Markdown(f"> {GSM8K_TRAIN[index]['question']}"))

        # --- Display Function Code ---
        code = manifest.get('function_code', '# Code not found')
        display(Markdown("## Function Code"))
        display(Markdown(f"```python\n{code}\n```"))

        # --- Display Logical Steps in a DataFrame ---
        display(Markdown("## Logical Steps"))
        steps = manifest.get('logical_steps', [])
        if not steps:
            print("No logical steps found in the manifest.")
            return
        
        df_steps = pd.DataFrame(steps)
        
        # Get the original solution text for comparison
        original_solution = build_solution_mapping(index=index, exclude_FA=True)
        
        df_steps['original_solution_line'] = df_steps['line_number'].apply(
            lambda ln: original_solution.get(ln, "N/A")
        )
        
        column_order = [
            'line_number', 
            'original_solution_line',
            'solution_line_template',
            'new_inputs',
            'output_variable'
        ]
        
        # Ensure columns exist before reordering
        existing_columns = [col for col in column_order if col in df_steps.columns]
        df_steps = df_steps[existing_columns]
        
        pd.set_option('display.max_colwidth', None)
        display(df_steps)

    except Exception as e:
        print(f"An error occurred while displaying the manifest for index {index}, model {model_name}: {e}")


# ====================================================================
#              --== Interactively Display a Manifest ==--
#   Change the index and model_name below to view a specific output.
# ====================================================================

# These variables must be defined in your notebook environment
# MODEL_DICT = {
#   "openai": "gpt-4.1-mini",
#   "google": "gemini-2.5-flash-lite-preview-06-17"
# }

INDEX_TO_DISPLAY = 0
MODEL_TO_DISPLAY = f"google_{MODEL_DICT['google']}" # Example: a gemini model

display_generated_manifest(INDEX_TO_DISPLAY, MODEL_TO_DISPLAY)

# Generated Manifest for Index: `0` | Model: `google_gemini-2.5-flash`

## Question

> Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?

## Function Code

```python
def solve():
    """Index: 0.
    Returns: the total number of clips Natalia sold in April and May.
    """
    # L1
    clips_sold_april = 48 # sold clips to 48 of her friends in April
    may_sales_divisor = 2 # sold half as many clips in May
    clips_sold_may = clips_sold_april / may_sales_divisor

    # L2
    total_clips_sold = clips_sold_april + clips_sold_may

    # FA
    answer = total_clips_sold
    return answer
```

## Logical Steps

Unnamed: 0,line_number,original_solution_line,solution_line_template,new_inputs,output_variable
0,L1,Natalia sold 48/2 = <<48/2=24>>24 clips in May.,Natalia sold {clips_sold_april}/{may_sales_divisor} = <<{clips_sold_april}/{may_sales_divisor}={clips_sold_may}>>{clips_sold_may} clips in May.,"[clips_sold_april, may_sales_divisor]",clips_sold_may
1,L2,Natalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.,Natalia sold {clips_sold_april}+{clips_sold_may} = <<{clips_sold_april}+{clips_sold_may}={total_clips_sold}>>{total_clips_sold} clips altogether in April and May.,[],total_clips_sold


In [15]:
def concat_json_texts(
        indices: List[int], 
        model_name: str, 
        parent_dir: Path):
    """
    Reads JSON files for the given indices and model_name from parent_dir,
    concatenates their text content, and returns as a single string.
    """
    parent_dir = Path(parent_dir)
    all_text = []
    for idx in indices:
        file_path = parent_dir / str(idx) / f"{model_name}.json"
        if file_path.exists():
            text = file_path.read_text(encoding="utf-8")
            all_text.append(text)
        else:
            print(f"Warning: File not found: {file_path}")
    return "\n".join(all_text)

In [None]:
outputs = {}
for model in MODELS:
    try:
        outputs[model] = concat_json_texts(
            indices=INDICES_TO_GENERATE, 
            model_name=model, 
            parent_dir=DEST_DIR
        )
        print(f"Concatenated output for model {model} with {len(outputs[model])} characters.")
    except Exception as e:
        print(f"Error processing model {model}: {e}")

Concatenated output for model openai_gpt-4.1 with 20069 characters.
Concatenated output for model google_gemini-2.5-flash with 17036 characters.


In [19]:
# save as text files to SAMPLE_MANIFESTS_DIR
for model, text in outputs.items():
    output_path = SAMPLE_MANIFESTS_DIR / f"{model}_output.txt"
    output_path.write_text(text, encoding='utf-8')
    print(f"Saved concatenated output for {model} to {output_path}")

Saved concatenated output for openai_gpt-4.1 to /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/sample_manifests/openai_gpt-4.1_output.txt
Saved concatenated output for google_gemini-2.5-flash to /Users/arvindsuresh/Documents/Github/Erdos-DL-June25-Math/data/sample_manifests/google_gemini-2.5-flash_output.txt


In [None]:
model = MODELS[0]
concat_json_texts(INDICES_TO_GENERATE, model, DEST_DIR))

{
  "function_code": "def solve():\n    \"\"\"Index: 0.\n    Returns: the total number of clips Natalia sold in April and May.\n    \"\"\"\n    # L1\n    april_clips = 48 # Natalia sold clips to 48 of her friends in April\n    may_divisor = 2 # she sold half as many clips in May\n    may_clips = april_clips / may_divisor\n\n    # L2\n    total_clips = april_clips + may_clips\n\n    # FA\n    answer = total_clips\n    return answer",
  "logical_steps": [
    {
      "line_number": "L1",
      "new_inputs": [
        "april_clips",
        "may_divisor"
      ],
      "output_variable": "may_clips",
      "solution_line_template": "Natalia sold {april_clips}/{may_divisor} = <<{april_clips}/{may_divisor}={may_clips}>>{may_clips} clips in May."
    },
    {
      "line_number": "L2",
      "new_inputs": [],
      "output_variable": "total_clips",
      "solution_line_template": "Natalia sold {april_clips}+{may_clips} = <<{april_clips}+{may_clips}={total_clips}>>{total_clips} clips altogeth

In [17]:
model = MODELS[1]
print(concat_json_texts(INDICES_TO_GENERATE, model, DEST_DIR))

{
  "function_code": "def solve():\n    \"\"\"Index: 0.\n    Returns: the total number of clips Natalia sold in April and May.\n    \"\"\"\n    # L1\n    clips_sold_april = 48 # sold clips to 48 of her friends in April\n    may_sales_divisor = 2 # sold half as many clips in May\n    clips_sold_may = clips_sold_april / may_sales_divisor\n\n    # L2\n    total_clips_sold = clips_sold_april + clips_sold_may\n\n    # FA\n    answer = total_clips_sold\n    return answer",
  "logical_steps": [
    {
      "line_number": "L1",
      "new_inputs": [
        "clips_sold_april",
        "may_sales_divisor"
      ],
      "output_variable": "clips_sold_may",
      "solution_line_template": "Natalia sold {clips_sold_april}/{may_sales_divisor} = <<{clips_sold_april}/{may_sales_divisor}={clips_sold_may}>>{clips_sold_may} clips in May."
    },
    {
      "line_number": "L2",
      "new_inputs": [],
      "output_variable": "total_clips_sold",
      "solution_line_template": "Natalia sold {clips_sold