In [1]:
import dspy
from datasets import load_dataset
import os

In [2]:
# Configure Ollama Language Model for DSPy
# Prerequisites: 
# 1. Install Ollama: curl -fsSL https://ollama.ai/install.sh | sh
# 2. Run model: ollama run llama3.2:1b (or your preferred model)

import dspy

# Configure Ollama LM using DSPy's official format
ollama_llm = dspy.LM(
    model='ollama_chat/gemma3:4b',  # Format: ollama_chat/{model_name}
    api_base='http://localhost:11434',  # Ollama default endpoint
    api_key='',  # Empty string for local Ollama
    max_tokens=65536,
    temperature=1.0
)

# Set as default LM
dspy.configure(lm=ollama_llm)

print("✅ Ollama LM configured successfully!")
print(f"Model: {ollama_llm.model}")
print("🔄 Make sure Ollama is running: ollama run qwen3:8b")

✅ Ollama LM configured successfully!
Model: ollama_chat/gemma3:4b
🔄 Make sure Ollama is running: ollama run qwen3:8b


In [5]:
open_router_lm = dspy.LM('openrouter/openai/gpt-4.1-nano', 
                          api_key=os.getenv('openrouter_api_key'), 
                          api_base='https://openrouter.ai/api/v1',
                          max_tokens=65536,
                          temperature=1.0)

dspy.configure(lm=open_router_lm)

reflection_lm = dspy.LM('openrouter/meta-llama/llama-4-scout', 
                          api_key=os.getenv('openrouter_api_key'), 
                          api_base='https://openrouter.ai/api/v1',
                          max_tokens=65536,
                          temperature=1.0)

In [6]:
train_split = load_dataset("AI-MO/NuminaMath-1.5")['train']

In [7]:
def is_numeric_answer(answer):
    try:
        int(answer)  # Try converting string to int number
        return True
    except (ValueError, TypeError):
        return False

In [8]:
# keep only the samples where its ['answer'] key is int or float, do it modular and fast.
train_split = train_split.filter(lambda x: is_numeric_answer(x['answer']))

Filter:   0%|          | 0/896215 [00:00<?, ? examples/s]

In [9]:
print(train_split[12]['answer'])

1898


In [10]:
def init_dataset(train_split_ratio=None, test_split_ratio=None, val_split_ratio=None, sample_fraction=1.0):
    if train_split_ratio is None:
        train_split_ratio = 0.5
    if test_split_ratio is None:
        test_split_ratio = 0.45
    if val_split_ratio is None:
        val_split_ratio = 0.05
    assert (train_split_ratio + test_split_ratio + val_split_ratio) == 1.0, "Ratios must sum to 1.0"

    train_split = load_dataset("AI-MO/NuminaMath-1.5")['train']
    # keep only the samples where its ['answer'] key is int or float.
    train_split = train_split.filter(lambda x: is_numeric_answer(x['answer']))
    train_split = [
        dspy.Example({
            "problem": x['problem'],
            'solution': x['solution'],
            'answer': x['answer'],
        }).with_inputs("problem")
        for x in train_split
    ]
    import random
    random.Random(0).shuffle(train_split)
    tot_num = len(train_split)
    print(f"Total number of examples after filtering: {tot_num}")

    if sample_fraction < 1.0:
        sample_num = int(tot_num * sample_fraction)
        train_split = train_split[:sample_num]
        tot_num = sample_num
        print(f"Sampled down to {sample_num} examples.")
    
    train_set = train_split[:int(train_split_ratio * tot_num)]
    val_set = train_split[int(train_split_ratio * tot_num):int((train_split_ratio + val_split_ratio) * tot_num)]
    test_set = train_split[int((train_split_ratio + val_split_ratio) * tot_num):]

    return train_set, val_set, test_set

In [11]:
train_set, val_set, test_set = init_dataset(sample_fraction=0.01)

len(train_set), len(val_set), len(test_set)

Filter:   0%|          | 0/896215 [00:00<?, ? examples/s]

Total number of examples after filtering: 298564
Sampled down to 2985 examples.


(1492, 149, 1344)

In [12]:
print("Problem:")
print(train_set[0]['problem'])
print("\n\nSolution:")
print(train_set[0]['solution'])
print("\n\nAnswer:")
print(train_set[0]['answer'])

Problem:
The graph of the equation $y = \frac{x}{x^3 + Ax^2 + Bx + C}$, where $A,B,C$ are integers, has vertical asymptotes at $x = -1, 3, 4$. Find $A + B + C$.


Solution:
Given the vertical asymptotes at $x = -1, 3, 4$, the denominator of the function can be formulated as: 
\[ x^3 + Ax^2 + Bx + C = (x + 1)(x - 3)(x - 4) \]

First, expand the product:
\[ (x + 1)(x - 3)(x - 4) \]
\[ = (x + 1)(x^2 - 7x + 12) \]
\[ = x^3 - 7x^2 + 12x + x^2 - 7x + 12 \]
\[ = x^3 - 6x^2 + 5x + 12 \]

Thus, we have $A = -6$, $B = 5$, and $C = 12$, so:
\[ A + B + C = -6 + 5 + 12 = \boxed{11} \]


Answer:
11


In [13]:
print(test_set[0]['problem'])
print("\n\nAnswer:")
print(test_set[0]['answer'])

A [i]Beaver-number[/i] is a positive 5 digit integer whose digit sum is divisible by 17. Call a pair of [i]Beaver-numbers[/i] differing by exactly $1$ a [i]Beaver-pair[/i]. The smaller number in a [i]Beaver-pair[/i] is called an [i]MIT Beaver[/i], while the larger number is called a [i]CIT Beaver[/i]. Find the positive difference between the largest and smallest [i]CIT Beavers[/i] (over all [i]Beaver-pairs[/i]).


Answer:
79200


In [19]:
class GenerateResponse(dspy.Signature):
    """Solve the problem and provide the answer in the correct format."""
    problem = dspy.InputField()
    answer = dspy.OutputField()

program = dspy.ChainOfThought(GenerateResponse)

In [20]:
def metric(example, prediction, trace=None, pred_name=None, pred_trace=None):
    correct_answer = int(example['answer'])
    try:
        llm_answer = int(prediction.answer)
    except ValueError as e:
        return 0
    return int(correct_answer == llm_answer)

In [21]:
import dspy
evaluate = dspy.Evaluate(
    devset=test_set,
    metric=metric,
    num_threads=32,
    display_table=True,
    display_progress=True
)

evaluate(program)

Average Metric: 208.00 / 471 (44.2%):  35%|███▍      | 470/1344 [02:23<01:29,  9.73it/s]



Average Metric: 569.00 / 1344 (42.3%): 100%|██████████| 1344/1344 [08:19<00:00,  2.69it/s]

2025/09/22 14:49:21 INFO dspy.evaluate.evaluate: Average Metric: 569 / 1344 (42.3%)





Unnamed: 0,problem,solution,example_answer,reasoning,pred_answer,metric
0,A [i]Beaver-number[/i] is a positive 5 digit integer whose digit s...,1. **Understanding the Problem:** - A *Beaver-number* is a 5-digit...,79200,A Beaver-number is defined as a five-digit integer whose digit sum...,81080,
1,Two passenger trains start at the same hour in the day from two di...,Let's denote the distance travelled by the slower train (16 kmph) ...,60,Let the two trains start at the same time from two stations that a...,60 km,
2,Mcdonald is planning to open up a farm that provides eggs to the c...,Let's denote the number of eggs Ben needs per week as B. Since Ked...,14,Let's define the variables: - Saly needs 10 eggs per week. - Ben n...,Ben needs 14 eggs per week.,
3,Given that the arithmetic sequence ${a_n}$ has a sum of its first ...,"Since the sequence ${a_n}$ is an arithmetic sequence, it follows t...",15,We are given an arithmetic sequence \(\{a_n\}\) with sum of the fi...,15,✔️ [1]
4,The ratio of spinsters to cats is 2 to 9. If there are 42 more cat...,Let the number of spinsters be represented by S and the number of ...,12,Let's denote the number of spinsters as S and the number of cats a...,12,✔️ [1]
...,...,...,...,...,...,...
1339,"If $\begin{vmatrix} a & b \\ c & d \end{vmatrix} = 6,$ then find \...","Given that $\begin{vmatrix} a & b \\ c & d \end{vmatrix} = 6,$ it ...",12,Given the original determinant \(\begin{vmatrix} a & b \\ c & d \e...,12,✔️ [1]
1340,Valentina bought a foot long burger and shared half with his broth...,"If Valentina bought a foot long burger, that means the burger is 1...",6,"Valentina bought a foot-long burger, which is 12 inches long. She ...",Each person’s share is 6 inches.,
1341,"In a sequence, 1 = 6, 2 = 12, 3 = 18, 4 = 24, and 5 = some value. ...",The sequence given is: 1 = 6 2 = 12 3 = 18 4 = 24 5 = ? 6 = 1 From...,30,"The sequence given is: 1 = 6, 2 = 12, 3 = 18, 4 = 24. Observing th...",30,✔️ [1]
1342,The value of $x$ that satisfies $\binom{x+1}{x-4} = \frac{7}{15}P^...,**Analysis** This question examines the formulas for combinations ...,10,"Given the equation \(\binom{x+1}{x-4} = \frac{7}{15} P_{x+1}^3\), ...",10,✔️ [1]


EvaluationResult(score=42.34, results=<list of 1344 results>)

In [25]:
# SYSTEMATIC DEBUGGING - Step 1: Test program on single example (FIXED)
print("=== STEP 1: Testing program on single example ===")
test_example = test_set[0]
print(f"Input problem: {test_example.problem[:100]}...")
print(f"Expected answer: {test_example.answer}")

try:
    # FIX: Use keyword argument matching signature field name
    prediction = program(problem=test_example.problem)
    print(f"Program prediction: {prediction}")
    print(f"Prediction answer: {prediction.answer}")
    print(f"Prediction type: {type(prediction.answer)}")
    print("✅ Program works!")
except Exception as e:
    print(f"❌ Program failed: {e}")
    import traceback
    traceback.print_exc()

=== STEP 1: Testing program on single example ===
Input problem: A [i]Beaver-number[/i] is a positive 5 digit integer whose digit sum is divisible by 17. Call a pair...
Expected answer: 79200
Program prediction: Prediction(
    reasoning='A Beaver-number is defined as a five-digit integer whose digit sum is divisible by 17. A Beaver-pair consists of two Beaver-numbers differing by exactly 1, with the smaller called an MIT Beaver and the larger a CIT Beaver. We need to find the range of CIT Beavers across all such pairs, specifically the difference between the largest and smallest CIT Beavers.\n\nTo analyze this, consider two consecutive integers n and n+1 differing by 1. For both to be Beaver-numbers, their digit sums must both be divisible by 17 (since the sum of digits of n and n+1 must both satisfy this property).\n\nLet s(n) be the digit sum of n, and s(n+1) be that of n+1. The difference:\n\ns(n+1) - s(n) = (sum of digits of n+1) - (sum of digits of n)\n\nAdding 1 to n affects the

In [26]:
def metric_with_feedback(example, prediction, trace=None, pred_name=None, pred_trace=None):
    correct_answer = int(example['answer'])
    written_solution = example.get('solution', '')
    try:
        llm_answer = int(prediction.answer)
    except ValueError as e:
        feedback_text = f"The final answer must be a valid integer and nothing else. You responded with '{prediction.answer}', which couldn't be parsed as a python integer. Please ensure your answer is a valid integer without any additional text or formatting."
        feedback_text += f" The correct answer is '{correct_answer}'."
        if written_solution:
            feedback_text += f" Here's the full step-by-step solution:\n{written_solution}\n\nThink about what takeaways you can learn from this solution to improve your future answers and approach to similar problems and ensure your final answer is a valid integer."
        return dspy.Prediction(score=0, feedback=feedback_text)

    score = int(correct_answer == llm_answer)

    feedback_text = ""
    if score == 1:
        feedback_text = f"Your answer is correct. The correct answer is '{correct_answer}'."
    else:
        feedback_text = f"Your answer is incorrect. The correct answer is '{correct_answer}'."
    
    if written_solution:
        feedback_text += f" Here's the full step-by-step solution:\n{written_solution}\n\nThink about what takeaways you can learn from this solution to improve your future answers and approach to similar problems."

    return dspy.Prediction(score=score, feedback=feedback_text)

In [29]:
from dspy import GEPA

optimizer = GEPA(
    metric=metric_with_feedback,
    auto="heavy",
    num_threads=32,
    track_stats=True,
    reflection_minibatch_size=16,
    track_best_outputs=True,
    add_format_failure_as_feedback=True,
    reflection_lm=reflection_lm
)


In [30]:
optimized_program = optimizer.compile(
    program,
    trainset=train_set,
    valset=val_set,
)

2025/09/22 15:29:11 INFO dspy.teleprompt.gepa.gepa: Running GEPA for approx 2078 metric calls of the program. This amounts to 1.27 full evals on the train+val set.
2025/09/22 15:29:11 INFO dspy.teleprompt.gepa.gepa: Using 149 examples for tracking Pareto scores. You can consider using a smaller sample of the valset to allow GEPA to explore more diverse solutions within the same budget.
2025/09/22 15:29:11 INFO dspy.evaluate.evaluate: Average Metric: 65.0 / 149 (43.6%)
2025/09/22 15:29:11 INFO dspy.teleprompt.gepa.gepa: Iteration 0: Base program full valset score: 0.436241610738255
2025/09/22 15:29:11 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Selected program 0 score: 0.436241610738255


Average Metric: 5.00 / 16 (31.2%): 100%|██████████| 16/16 [00:00<00:00, 924.30it/s]

2025/09/22 15:29:11 INFO dspy.evaluate.evaluate: Average Metric: 5.0 / 16 (31.2%)
2025/09/22 15:29:11 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Proposed new text for predict: ### Instruction

Solve the given problem and provide the answer in the correct format. 

To ensure accuracy, consider the following guidelines:

1. **Read and Understand the Problem**: Carefully read the problem statement and identify the key components, including any specific constraints or requirements.

2. **Provide Detailed Reasoning**: Offer a step-by-step explanation of your thought process and calculations. This will help in ensuring that the approach is correct and easy to follow.

3. **Use Correct Mathematical Notation**: Ensure that all mathematical expressions and equations are clearly written and correctly formatted.

4. **Check for Common Mistakes**: Verify the calculations and reasoning to avoid common mistakes, such as incorrect unit conversions or miscalculations.

5. **Rationalize Denominators 




2025/09/22 15:29:12 INFO dspy.evaluate.evaluate: Average Metric: 70.0 / 149 (47.0%)
2025/09/22 15:29:12 INFO dspy.teleprompt.gepa.gepa: Iteration 1: New program is on the linear pareto front
2025/09/22 15:29:12 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Full valset score for new program: 0.4697986577181208
2025/09/22 15:29:12 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Full train_val score for new program: 0.4697986577181208
2025/09/22 15:29:12 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Individual valset scores for new program: [1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0]
2025/09/22 15

Average Metric: 9.00 / 16 (56.2%): 100%|██████████| 16/16 [00:00<00:00, 689.43it/s]

2025/09/22 15:29:12 INFO dspy.evaluate.evaluate: Average Metric: 9.0 / 16 (56.2%)
2025/09/22 15:29:12 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Proposed new text for predict: ### Instruction
Solve the given problem and provide the answer in the correct format. Ensure the final answer is a valid integer without any additional text or formatting. If there are multiple parts to the problem, provide a clear and concise solution for each part. Consider the context and domain-specific information that might be necessary to solve the task accurately. 

The tasks may involve calculations, logical reasoning, or problem-solving strategies. Use the information provided in the examples and feedback to enhance your understanding of the tasks and provide accurate solutions.

In case of sequence or series problems, ensure you provide a clear step-by-step solution. For problems involving geometry or algebra, include relevant formulas or theorems used in the solution.

In your final answer, do not i




2025/09/22 15:29:12 INFO dspy.evaluate.evaluate: Average Metric: 94.0 / 149 (63.1%)
2025/09/22 15:29:12 INFO dspy.teleprompt.gepa.gepa: Iteration 2: New program is on the linear pareto front
2025/09/22 15:29:12 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Full valset score for new program: 0.6308724832214765
2025/09/22 15:29:12 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Full train_val score for new program: 0.6308724832214765
2025/09/22 15:29:12 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Individual valset scores for new program: [1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0]
2025/09/22 15

Average Metric: 11.00 / 16 (68.8%): 100%|██████████| 16/16 [00:00<00:00, 829.75it/s]

2025/09/22 15:29:12 INFO dspy.evaluate.evaluate: Average Metric: 11.0 / 16 (68.8%)
2025/09/22 15:29:12 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Proposed new text for predict: ### Instruction

Solve the given problem and provide the answer in the correct format. 

To ensure accuracy, consider the following guidelines:

1. **Read and Understand the Problem**: Carefully read the problem statement and identify the key components, including any specific constraints or requirements.

2. **Provide Detailed Reasoning**: Offer a step-by-step explanation of your thought process and calculations. This will help in ensuring that the approach is correct and easy to follow.

3. **Use Correct Mathematical Notation**: Ensure that all mathematical expressions and equations are clearly written and correctly formatted.

4. **Check for Common Mistakes**: Verify the calculations and reasoning to avoid common mistakes, such as incorrect unit conversions or miscalculations.

5. **Rationalize Denominators


Average Metric: 12.00 / 16 (75.0%): 100%|██████████| 16/16 [00:00<00:00, 740.94it/s]

2025/09/22 15:29:12 INFO dspy.evaluate.evaluate: Average Metric: 12.0 / 16 (75.0%)
2025/09/22 15:29:12 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Proposed new text for predict: ### Instruction
Solve the given problem and provide the answer in the correct format. Ensure the final answer is a valid integer without any additional text or formatting. If there are multiple parts to the problem, provide a clear and concise solution for each part. Consider the context and domain-specific information that might be necessary to solve the task accurately.

The tasks may involve calculations, logical reasoning, or problem-solving strategies. Use the information provided in the examples and feedback to enhance your understanding of the tasks and provide accurate solutions.

In case of sequence or series problems, ensure you provide a clear step-by-step solution. For problems involving geometry or algebra, include relevant formulas or theorems used in the solution.

In your final answer, do not i




2025/09/22 15:29:13 INFO dspy.evaluate.evaluate: Average Metric: 99.0 / 149 (66.4%)
2025/09/22 15:29:13 INFO dspy.teleprompt.gepa.gepa: Iteration 4: New program is on the linear pareto front
2025/09/22 15:29:13 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Full valset score for new program: 0.6644295302013423
2025/09/22 15:29:13 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Full train_val score for new program: 0.6644295302013423
2025/09/22 15:29:13 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Individual valset scores for new program: [1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0]
2025/09/22 15

Average Metric: 10.00 / 16 (62.5%): 100%|██████████| 16/16 [00:00<00:00, 719.71it/s]

2025/09/22 15:29:13 INFO dspy.evaluate.evaluate: Average Metric: 10.0 / 16 (62.5%)
2025/09/22 15:29:13 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Proposed new text for predict: ### Instruction
Solve the given problem and provide the answer in the correct format. Ensure the final answer is a valid integer or a specific number without any additional text or formatting. If there are multiple parts to the problem, provide a clear and concise solution for each part. Consider the context and domain-specific information that might be necessary to solve the task accurately.

In your response, include:
- A clear step-by-step solution
- Relevant formulas or theorems used
- Domain-specific information

In case of sequence or series problems, provide a clear step-by-step solution. For problems involving geometry or algebra, include relevant formulas or theorems used in the solution.

In your final answer, do not include units or additional descriptions unless specifically required by the task.






2025/09/22 15:29:13 INFO dspy.evaluate.evaluate: Average Metric: 94.0 / 149 (63.1%)
2025/09/22 15:29:13 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Full valset score for new program: 0.6308724832214765
2025/09/22 15:29:13 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Full train_val score for new program: 0.6308724832214765
2025/09/22 15:29:13 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Individual valset scores for new program: [1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0]
2025/09/22 15:29:13 INFO dspy.teleprompt.gepa.gepa: Iteration 5: New valset pareto front scores: [1, 0, 0, 1, 1, 0, 1, 1

Average Metric: 7.00 / 16 (43.8%): 100%|██████████| 16/16 [00:00<00:00, 707.57it/s]

2025/09/22 15:29:13 INFO dspy.evaluate.evaluate: Average Metric: 7.0 / 16 (43.8%)
2025/09/22 15:29:13 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Proposed new text for predict: Solve the given problem and provide the answer in the correct format.

### Problem Understanding

Read and understand the problem statement provided.

### Task Requirements

1. Analyze the problem statement.
2. Develop a step-by-step solution.
3. Provide the final answer in the required format.

### Key Constraints

- Ensure the final answer is a valid integer or follows the specified format.
- Include all necessary calculations and explanations.

### Niche and Domain-Specific Information

Incorporate relevant information from the feedback to improve future responses.

### Generalizable Strategies

Utilize generalizable strategies and mathematical techniques to solve similar problems.

### Final Answer Format

Provide the final answer in the format: $\boxed{answer}$ or answer, without additional text.
2025/09/2


Average Metric: 8.00 / 16 (50.0%): 100%|██████████| 16/16 [00:00<00:00, 765.02it/s]

2025/09/22 15:29:13 INFO dspy.evaluate.evaluate: Average Metric: 8.0 / 16 (50.0%)
2025/09/22 15:29:13 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Proposed new text for predict: ### Instruction
Solve the given problem and provide the answer in the correct format. Ensure the final answer is a valid integer without any additional text or formatting. If there are multiple parts to the problem, provide a clear and concise solution for each part. Consider the context and domain-specific information that might be necessary to solve the task accurately.

The tasks may involve calculations, logical reasoning, or problem-solving strategies. Use the information provided in the examples and feedback to enhance your understanding of the tasks and provide accurate solutions.

In case of sequence or series problems, ensure you provide a clear step-by-step solution. For problems involving geometry or algebra, include relevant formulas or theorems used in the solution.

In your final answer, do not in




2025/09/22 15:29:13 INFO dspy.evaluate.evaluate: Average Metric: 9.0 / 16 (56.2%)
2025/09/22 15:29:14 INFO dspy.evaluate.evaluate: Average Metric: 96.0 / 149 (64.4%)
2025/09/22 15:29:14 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Full valset score for new program: 0.6442953020134228
2025/09/22 15:29:14 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Full train_val score for new program: 0.6442953020134228
2025/09/22 15:29:14 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Individual valset scores for new program: [1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0]
2025/09/22 15:29:14 INFO dspy.teleprom

Average Metric: 13.00 / 16 (81.2%): 100%|██████████| 16/16 [00:37<00:00,  2.36s/it]

2025/09/22 15:29:52 INFO dspy.evaluate.evaluate: Average Metric: 13.0 / 16 (81.2%)





2025/09/22 15:29:59 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Proposed new text for predict: ### Instruction
Solve the given problem and provide the answer in the correct format. Ensure the final answer is a valid integer without any additional text or formatting. If there are multiple parts to the problem, provide a clear and concise solution for each part. Consider the context and domain-specific information that might be necessary to solve the task accurately.

The tasks may involve calculations, logical reasoning, or problem-solving strategies. Use the information provided in the examples and feedback to enhance your understanding of the tasks and provide accurate solutions.

In case of sequence or series problems, ensure you provide a clear step-by-step solution. For problems involving geometry or algebra, include relevant formulas or theorems used in the solution.

In your final answer, do not include units or additional descriptions unless specifically required by the task.



Average Metric: 5.00 / 16 (31.2%): 100%|██████████| 16/16 [00:21<00:00,  1.34s/it]

2025/09/22 15:30:38 INFO dspy.evaluate.evaluate: Average Metric: 5.0 / 16 (31.2%)





2025/09/22 15:30:43 INFO dspy.teleprompt.gepa.gepa: Iteration 9: Proposed new text for predict: ### Instruction
Solve the given problem and provide the answer in the correct format. Ensure the final answer is a valid integer or a clear and concise solution for each part. Consider the context and domain-specific information that might be necessary to solve the task accurately.

### Task Requirements:
- Read and understand the problem statement carefully.
- Identify the key elements and constraints of the problem.
- Apply relevant mathematical formulas or theorems to solve the problem.
- Provide a clear and step-by-step solution for each part of the problem.
- Ensure the final answer is a valid integer without any additional text or formatting.
- Use the information provided in the examples and feedback to enhance your understanding of the tasks and provide accurate solutions.

### Specific Guidelines:
- For sequence or series problems, provide a clear step-by-step solution.
- For proble

Average Metric: 11.00 / 16 (68.8%): 100%|██████████| 16/16 [00:22<00:00,  1.38s/it]

2025/09/22 15:33:36 INFO dspy.evaluate.evaluate: Average Metric: 11.0 / 16 (68.8%)





2025/09/22 15:33:42 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Proposed new text for predict: ### Instruction
Solve the given mathematical problem and provide the answer in the correct format. Ensure the final answer is a valid integer or a specific number without any additional text or formatting. If there are multiple parts to the problem, provide a clear and concise solution for each part. Consider the context and domain-specific information that might be necessary to solve the task accurately.

In your response, include:
- A clear step-by-step solution
- Relevant formulas or theorems used
- Domain-specific information

In case of sequence or series problems, provide a clear step-by-step solution. For problems involving geometry or algebra, include relevant formulas or theorems used in the solution.

In your final answer, do not include units or additional descriptions unless specifically required by the task.

### Specific Requirements:
- The final answer should be a valid integ

Average Metric: 9.00 / 16 (56.2%): 100%|██████████| 16/16 [00:48<00:00,  3.02s/it]

2025/09/22 15:35:22 INFO dspy.evaluate.evaluate: Average Metric: 9.0 / 16 (56.2%)





2025/09/22 15:35:33 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Proposed new text for predict: ### Instruction

Solve the given problem and provide the answer in the correct format. 

To ensure accuracy, consider the following guidelines:

1. **Read and Understand the Problem**: Carefully read the problem statement and identify the key components, including any specific constraints or requirements.

2. **Provide Detailed Reasoning**: Offer a step-by-step explanation of your thought process and calculations. This will help in ensuring that the approach is correct and easy to follow.

3. **Use Correct Mathematical Notation**: Ensure that all mathematical expressions and equations are clearly written and correctly formatted.

4. **Check for Common Mistakes**: Verify the calculations and reasoning to avoid common mistakes, such as incorrect unit conversions or miscalculations.

5. **Rationalize Denominators When Required**: If a problem requires the rationalization of denominators, ensur

Average Metric: 7.00 / 16 (43.8%): 100%|██████████| 16/16 [00:40<00:00,  2.53s/it]

2025/09/22 15:39:01 INFO dspy.evaluate.evaluate: Average Metric: 7.0 / 16 (43.8%)





2025/09/22 15:39:10 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Proposed new text for predict: ### Instruction

Solve the given mathematical problem and provide the final numerical answer.

### Guidelines

1. Read and understand the problem statement.
2. Provide detailed step-by-step reasoning.
3. Use correct mathematical notation and formatting.
4. Verify calculations to ensure accuracy.
5. Ensure the final answer is a valid integer.

### Problem Statement

Given along with the task.

### Task

Solve the problem and provide the final numerical answer in the correct format.

### Example

Provided earlier.

### Additional Constraints

- The final answer must be a valid integer.
- No additional text or formatting is allowed.

### Strategy

- Understand the problem and identify key components.
- Break down the problem into manageable parts.
- Calculate step-by-step and verify accuracy.
- Provide the final numerical answer in the required format.

### Final Answer Format

A valid integer

Average Metric: 10.00 / 16 (62.5%): 100%|██████████| 16/16 [00:20<00:00,  1.30s/it]

2025/09/22 15:41:47 INFO dspy.evaluate.evaluate: Average Metric: 10.0 / 16 (62.5%)





2025/09/22 15:41:52 INFO dspy.teleprompt.gepa.gepa: Iteration 13: Proposed new text for predict: ### Instruction
Solve the given problem and provide the answer in the correct format. Ensure the final answer is a valid integer without any additional text or formatting. If there are multiple parts to the problem, provide a clear and concise solution for each part. Consider the context and domain-specific information that might be necessary to solve the task accurately.

The tasks may involve calculations, logical reasoning, or problem-solving strategies. Use the information provided in the examples and feedback to enhance your understanding of the tasks and provide accurate solutions.

In case of sequence or series problems, ensure you provide a clear step-by-step solution. For problems involving geometry or algebra, include relevant formulas or theorems used in the solution.

In your final answer, do not include units or additional descriptions unless specifically required by the task.


Average Metric: 5.00 / 12 (41.7%):  75%|███████▌  | 12/16 [00:27<00:14,  3.65s/it]



Average Metric: 6.00 / 16 (37.5%): 100%|██████████| 16/16 [01:12<00:00,  4.56s/it]

2025/09/22 15:43:54 INFO dspy.evaluate.evaluate: Average Metric: 6.0 / 16 (37.5%)





2025/09/22 15:44:05 INFO dspy.teleprompt.gepa.gepa: Iteration 14: Proposed new text for predict: ### Instruction

Solve the given problem and provide the answer in the correct format. 

To ensure accuracy, consider the following guidelines:

1. **Read and Understand the Problem**: Carefully read the problem statement and identify the key components, including any specific constraints or requirements.

2. **Provide Detailed Reasoning**: Offer a step-by-step explanation of your thought process and calculations. This will help in ensuring that the approach is correct and easy to follow.

3. **Use Correct Mathematical Notation**: Ensure that all mathematical expressions and equations are clearly written and correctly formatted.

4. **Check for Common Mistakes**: Verify the calculations and reasoning to avoid common mistakes, such as incorrect unit conversions or miscalculations.

5. **Rationalize Denominators When Required**: If a problem requires the rationalization of denominators, ensur

Average Metric: 13.00 / 16 (81.2%): 100%|██████████| 16/16 [00:14<00:00,  1.11it/s]

2025/09/22 15:45:14 INFO dspy.evaluate.evaluate: Average Metric: 13.0 / 16 (81.2%)





2025/09/22 15:45:22 INFO dspy.teleprompt.gepa.gepa: Iteration 15: Proposed new text for predict: ### Instruction
Solve the given problem and provide the answer in the correct format. Ensure the final answer is a valid integer without any additional text or formatting. If there are multiple parts to the problem, provide a clear and concise solution for each part. Consider the context and domain-specific information that might be necessary to solve the task accurately.

The tasks may involve calculations, logical reasoning, or problem-solving strategies. Use the information provided in the examples and feedback to enhance your understanding of the tasks and provide accurate solutions.

In case of sequence or series problems, ensure you provide a clear step-by-step solution. For problems involving geometry or algebra, include relevant formulas or theorems used in the solution.

In your final answer, do not include units or additional descriptions unless specifically required by the task.


Average Metric: 11.00 / 16 (68.8%): 100%|██████████| 16/16 [01:08<00:00,  4.27s/it]

2025/09/22 15:46:46 INFO dspy.evaluate.evaluate: Average Metric: 11.0 / 16 (68.8%)





2025/09/22 15:46:52 INFO dspy.teleprompt.gepa.gepa: Iteration 16: Proposed new text for predict: Solve the given mathematical problem and provide the final answer as a valid integer without any additional text or formatting. 

Read the problem carefully and identify the key elements. 
Use relevant formulas and theorems to solve the problem. 
Provide a clear and concise step-by-step solution. 
Ensure the final answer is accurate and in the correct format.

In case of sequence or series problems, provide a clear step-by-step solution. 
For problems involving geometry or algebra, include relevant formulas or theorems used in the solution.

Do not include units or additional descriptions in the final answer unless specifically required by the task.
2025/09/22 15:47:17 INFO dspy.evaluate.evaluate: Average Metric: 8.0 / 16 (50.0%)
2025/09/22 15:47:17 INFO dspy.teleprompt.gepa.gepa: Iteration 16: New subsample score is not better, skipping
2025/09/22 15:47:17 INFO dspy.teleprompt.gepa.gepa: I

Average Metric: 12.00 / 16 (75.0%): 100%|██████████| 16/16 [00:49<00:00,  3.07s/it]

2025/09/22 15:48:06 INFO dspy.evaluate.evaluate: Average Metric: 12.0 / 16 (75.0%)





2025/09/22 15:48:13 INFO dspy.teleprompt.gepa.gepa: Iteration 17: Proposed new text for predict: ### Instruction

Solve the given mathematical problem and provide the final numerical answer.

### Guidelines

1. Read and understand the problem statement.
2. Provide detailed step-by-step reasoning.
3. Use correct mathematical notation and formatting.
4. Verify calculations to ensure accuracy.
5. Ensure the final answer is a valid integer or a specific numerical value as required.

### Problem Statement

Along with the task.

### Task

Solve the problem and provide the final numerical answer in the correct format.

### Additional Constraints

- The final answer must be a valid integer or a specific numerical value as required.
- No additional text or formatting is allowed, except for mathematical notation.

### Strategy

- Understand the problem and identify key components.
- Break down the problem into manageable parts.
- Calculate step-by-step and verify accuracy.
- Provide the final nu

Average Metric: 8.00 / 16 (50.0%): 100%|██████████| 16/16 [00:36<00:00,  2.29s/it]

2025/09/22 15:49:17 INFO dspy.evaluate.evaluate: Average Metric: 8.0 / 16 (50.0%)





2025/09/22 15:49:23 INFO dspy.teleprompt.gepa.gepa: Iteration 18: Proposed new text for predict: ### Instruction
Solve the given problem and provide the answer in the correct format. Ensure the final answer is a valid integer without any additional text or formatting. If there are multiple parts to the problem, provide a clear and concise solution for each part. Consider the context and domain-specific information that might be necessary to solve the task accurately.

The tasks may involve calculations, logical reasoning, or problem-solving strategies. Use the information provided in the examples and feedback to enhance your understanding of the tasks and provide accurate solutions.

In case of sequence or series problems, ensure you provide a clear step-by-step solution. For problems involving geometry or algebra, include relevant formulas or theorems used in the solution.

In your final answer, do not include units or additional descriptions unless specifically required by the task.


Average Metric: 10.00 / 16 (62.5%): 100%|██████████| 16/16 [00:18<00:00,  1.13s/it]

2025/09/22 15:51:40 INFO dspy.evaluate.evaluate: Average Metric: 10.0 / 16 (62.5%)





2025/09/22 15:51:45 INFO dspy.teleprompt.gepa.gepa: Iteration 19: Proposed new text for predict: ### Instruction
Solve the given problem and provide the answer in the correct format. Ensure the final answer is a valid response without any additional text or formatting. If there are multiple parts to the problem, provide a clear and concise solution for each part. Consider the context and domain-specific information that might be necessary to solve the task accurately.

The tasks may involve calculations, logical reasoning, or problem-solving strategies. Use the information provided in the examples and feedback to enhance your understanding of the tasks and provide accurate solutions.

In case of sequence or series problems, ensure you provide a clear step-by-step solution. For problems involving geometry or algebra, include relevant formulas or theorems used in the solution.

In your final answer, do not include units or additional descriptions unless specifically required by the task.

In [31]:
print(optimized_program.predict.signature.instructions)

### Instruction
Solve the given problem and provide the answer in the correct format. Ensure the final answer is a valid integer without any additional text or formatting. If there are multiple parts to the problem, provide a clear and concise solution for each part. Consider the context and domain-specific information that might be necessary to solve the task accurately.

The tasks may involve calculations, logical reasoning, or problem-solving strategies. Use the information provided in the examples and feedback to enhance your understanding of the tasks and provide accurate solutions.

In case of sequence or series problems, ensure you provide a clear step-by-step solution. For problems involving geometry or algebra, include relevant formulas or theorems used in the solution.

In your final answer, do not include units or additional descriptions unless specifically required by the task.

### Examples and Feedback for Reference:
- Provided for context and to improve future responses.

In [32]:
evaluate(optimized_program)

Average Metric: 860.00 / 1344 (64.0%): 100%|██████████| 1344/1344 [07:05<00:00,  3.16it/s]

2025/09/22 15:59:11 INFO dspy.evaluate.evaluate: Average Metric: 860 / 1344 (64.0%)





Unnamed: 0,problem,solution,example_answer,reasoning,pred_answer,metric
0,A [i]Beaver-number[/i] is a positive 5 digit integer whose digit s...,1. **Understanding the Problem:** - A *Beaver-number* is a 5-digit...,79200,A Beaver-number is a five-digit integer whose digits sum to a mult...,80253,
1,Two passenger trains start at the same hour in the day from two di...,Let's denote the distance travelled by the slower train (16 kmph) ...,60,Let the two trains start simultaneously from two stations separate...,60,✔️ [1]
2,Mcdonald is planning to open up a farm that provides eggs to the c...,Let's denote the number of eggs Ben needs per week as B. Since Ked...,14,The total eggs needed in a month are 124. Saly needs 10 eggs per w...,14,✔️ [1]
3,Given that the arithmetic sequence ${a_n}$ has a sum of its first ...,"Since the sequence ${a_n}$ is an arithmetic sequence, it follows t...",15,Given the arithmetic sequence \( a_n \) with sum of first \( n \) ...,15,✔️ [1]
4,The ratio of spinsters to cats is 2 to 9. If there are 42 more cat...,Let the number of spinsters be represented by S and the number of ...,12,"Let the number of spinsters be 2k and the number of cats be 9k, as...",12,✔️ [1]
...,...,...,...,...,...,...
1339,"If $\begin{vmatrix} a & b \\ c & d \end{vmatrix} = 6,$ then find \...","Given that $\begin{vmatrix} a & b \\ c & d \end{vmatrix} = 6,$ it ...",12,Given the determinant \(\begin{vmatrix} a & b \\ c & d \end{vmatri...,12,✔️ [1]
1340,Valentina bought a foot long burger and shared half with his broth...,"If Valentina bought a foot long burger, that means the burger is 1...",6,"Valentina bought a foot-long burger, which is 12 inches. She share...",6,✔️ [1]
1341,"In a sequence, 1 = 6, 2 = 12, 3 = 18, 4 = 24, and 5 = some value. ...",The sequence given is: 1 = 6 2 = 12 3 = 18 4 = 24 5 = ? 6 = 1 From...,30,"The sequence provided is: 1 = 6, 2 = 12, 3 = 18, 4 = 24. We observ...",30,✔️ [1]
1342,The value of $x$ that satisfies $\binom{x+1}{x-4} = \frac{7}{15}P^...,**Analysis** This question examines the formulas for combinations ...,10,We are given the equation \(\binom{x+1}{x-4} = \frac{7}{15} P_{x+1...,3,


EvaluationResult(score=63.99, results=<list of 1344 results>)