In [43]:
import pandas as pd
import numpy as np

import importlib
import inspect
import os
import json
import random
import openai
import google.generativeai as genai
import anthropic
from openai import OpenAI
from datasets import load_dataset
from tqdm import tqdm
from dotenv import load_dotenv

from typing import List, Dict, Any

In [44]:
# Initialize clients
load_dotenv()
openai_client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
anthropic_client = anthropic.Client(api_key=os.getenv("ANTHROPIC_API_KEY"))

In [45]:
# Load the GSM8K dataset (train split)
gsm8k_train = load_dataset("gsm8k", "main", split="train")

In [46]:
SYSTEM_PROMPT = "You are an expert Python programmer specializing in data formalization. Your role is to meticulously convert natural language math problems and their step-by-step solutions into a single, well-structured Python function. You will be presented with examples of the required format followed by a final task to complete."

PROMPT_GUIDELINES = """### Guidelines

1.  **Function Naming & Docstring:** The function must be named `solve`. It must begin with a docstring that has exactly two lines:
    *   The first line must be: "Code for Q [Index] from the GSM8K dataset (train).", using the index from the task header.
    *   The second line must be a succinct, one-sentence description of what the function returns (e.g., "Returns the total cost of wages and taxes.").

2.  **Function Arguments:** The function arguments must be derived from the 'Question' text. 
    *   Create a distinct argument for every numerical value that is directly stated in the text.
    *   **Note:** Some of these arguments may end up not being used in the function body. This is expected. Do not worry about this and leave the unused arguments in the function signature.

3.  **Argument Formatting:** Each argument must include a type-hint (e.g., `int`, `float`) and a default value equal to its value in the 'Question'. You must also add a comment (`#`) next to each argument that quotes or describes the phrase in the 'Question' it comes from.

4.  **Function Body:** The body of the function should follow the logic of the provided 'Solution'. Each relevant line from the 'Solution' that involves a computation must be included as a comment, immediately followed by the Python code that formalizes that step.

5.  **Calculator Annotations:** Pay close attention to the calculator annotations (e.g., `<<25*8=200>>`) in the 'Solution' as they reveal the precise mathematical operations to implement.

6.  **Final Answer Comment:** Before the final `return` statement, you must add a comment identifying the variable that holds the final answer (e.g., `# The final answer is the grand total`)."""

In [47]:
indices = [310, 3822, 2345, 1202, 7371]
code_strings = {}

for idx in indices:
    module = importlib.import_module(f"code_examples._{idx}")
    code = inspect.getsource(module.solve)
    code_strings[idx] = code

In [48]:
def _format_prompt_query(
        index: int, 
        code_strings: dict = code_strings,
        with_code: bool = False
):
    """
    Internal helper function to format a single entry.
    It creates the text for a problem's Index, Question, Solution, and (if `with_code == True`) the corresponding code.
    """
    sample = gsm8k_train[index] # type: ignore
    question = sample["question"]
    raw_answer = sample["answer"]
    solution = raw_answer.split('####')[0].strip()
    out = \
f"""*Index*: 
{index}

*Question*: 
{question}

*Solution*: 
{solution}

*Code*:
"""
    if with_code:
        out += f"""\n```python
{code_strings[index]}
```
"""
    return out

In [49]:
def craft_user_prompt(
    index: int,
    example_indices: List[int],
    code_examples: Dict[int, str]
    ):
    """
Generates a complete user prompt for the LLM to generate code. This function assembles the guidelines, few-shot examples, and the final unsolved task into a single string, ready to be sent to an LLM.

Args:
    index: The index of the target problem to generate code for.
    example_indices: A list of indices to use as few-shot examples.
    code_examples: A dictionary mapping example indices to their code strings.

Returns:
    A single string containing the full user prompt.
"""
    # This function assumes a variable `PROMPT_GUIDELINES` exists in its scope.

    # Generate the formatted strings for the few-shot examples
    example_prompts = [
        _format_prompt_query(index=idx, 
                             code_strings=code_examples,
                             with_code=True)
        for idx in example_indices
    ]

    # Generate the formatted string for the final task to be completed by the LLM
    task_prompt = _format_prompt_query(index=index, code_strings=code_examples)

    # Combine all parts into a single prompt string
    # We use two newlines to visually separate major sections
    full_prompt = "\n".join([
        PROMPT_GUIDELINES,
        "\n--- EXAMPLES ---\n",
        "\n".join(example_prompts),
        "--- TASK ---\n",
        task_prompt
    ])

    return full_prompt

In [51]:
check = craft_user_prompt(
    index = 5,
    example_indices= [310, 3822, 7371],
    code_examples=code_strings
)
print(check)

### Guidelines

1.  **Function Naming & Docstring:** The function must be named `solve`. It must begin with a docstring that has exactly two lines:
    *   The first line must be: "Code for Q [Index] from the GSM8K dataset (train).", using the index from the task header.
    *   The second line must be a succinct, one-sentence description of what the function returns (e.g., "Returns the total cost of wages and taxes.").

2.  **Function Arguments:** The function arguments must be derived from the 'Question' text. 
    *   Create a distinct argument for every numerical value that is directly stated in the text.
    *   **Note:** Some of these arguments may end up not being used in the function body. This is expected. Do not worry about this and leave the unused arguments in the function signature.

3.  **Argument Formatting:** Each argument must include a type-hint (e.g., `int`, `float`) and a default value equal to its value in the 'Question'. You must also add a comment (`#`) next to e

In [None]:
model_dict = {
    "openai": [
        "davinci-002",
        "gpt-4.1-mini",
        "gpt-4.1",
        "o3-mini",
        
    ]
}