In [6]:
import sys
import os
import dspy 
from common.my_settings import MySettings  
from common.utils import md
from common.llm_client_factory import LlmClientFactory
from dspy_utils.dspy_helpers import md_dspy

settings = MySettings().get()

Getting keys from environment variables


In [7]:
# Smaller LLM, this is the one that we are trying to optimize for, the prompts are going to be tweaked
# to get the best out of this model
lm_gpt35 = dspy.LM('gpt-3.5-turbo', temperature=0.8, model_type='chat', cache=False, api_key=settings.OPENAI_API_KEY)
#dspy.configure(lm=lm_gpt35)

lm_gpt41mini = dspy.LM('gpt-4.1-mini', temperature=0.8, model_type='chat', cache=False, api_key=settings.OPENAI_API_KEY)

# # Larger LLM, this is the one that we are going to use to optimize the prompts
# # It will be the helper/teach/AI Judge to assist in the optimization process
lm_gpt4 = dspy.LM('gpt-4.1', temperature=0.9, model_type='chat', cache=False, api_key=settings.OPENAI_API_KEY)
dspy.configure(lm=lm_gpt4)

In [None]:
# Create domain classes

from pydantic import BaseModel
from typing import Literal
    
class NumberPicker(dspy.Signature):
    """Guess a number"""
    number_guess: Literal["one", "two"] = dspy.InputField()
    answer: Literal["one", "two"] = dspy.OutputField()

numberPickerPredict = dspy.Predict(NumberPicker)
numberPickerPredict(number_guess="even")

Prediction(
    answer='two'
)

In [None]:
trainset = [
    dspy.Example(number_guess="one", answer="one").with_inputs("number_guess"),
    dspy.Example(number_guess="two", answer="two").with_inputs("number_guess"),
    dspy.Example(number_guess="one", answer="two").with_inputs("number_guess"),
    dspy.Example(number_guess="two", answer="one").with_inputs("number_guess")
]

In [10]:
def validate_match(expected, actual, trace=None) -> bool:
    print()
    md("**expected**: ", expected)
    md("**actual**: ", actual)
    #md("**Is Fish**: ", actual.dish_type == "Fish")
    print()
    return (actual.answer == "two")

from dspy.teleprompt import *

tp = dspy.MIPROv2(metric=validate_match, auto="heavy", prompt_model=lm_gpt35, task_model=lm_gpt4)
optimized_matcher = tp.compile(numberPickerPredict, trainset=trainset, requires_permission_to_run=False)

2025/09/03 17:01:23 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING HEAVY AUTO RUN SETTINGS:
num_trials: 27
minibatch: False
num_fewshot_candidates: 18
num_instruct_candidates: 9
valset size: 3

2025/09/03 17:01:23 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/09/03 17:01:23 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/09/03 17:01:23 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=18 sets of demonstrations...


Bootstrapping set 1/18
Bootstrapping set 2/18
Bootstrapping set 3/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:01<00:00,  1.48s/it]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 4/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:01<00:00,  1.29s/it]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 5/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:01<00:00,  1.78s/it]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 6/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:01<00:00,  1.52s/it]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:01<00:00,  1.33s/it]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 8/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:00<00:00,  1.02it/s]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 9/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:00<00:00,  1.29it/s]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 10/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:00<00:00,  1.44it/s]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 11/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:01<00:00,  1.03s/it]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 12/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:01<00:00,  1.10s/it]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 13/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:00<00:00,  1.13it/s]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 14/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:00<00:00,  1.10it/s]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 15/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:00<00:00,  1.14it/s]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 16/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:00<00:00,  1.27it/s]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 17/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:00<00:00,  1.34it/s]



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 18/18


  0%|          | 0/1 [00:00<?, ?it/s]




**expected**: Example({'number_guess': 'one', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)

100%|██████████| 1/1 [00:00<00:00,  1.34it/s]
2025/09/03 17:01:40 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/09/03 17:01:40 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.



Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.


2025/09/03 17:01:41 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=9 instructions...

2025/09/03 17:02:22 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/09/03 17:02:22 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Guess a number

2025/09/03 17:02:22 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Imagine you are in a high-stakes guessing game where your reputation is on the line. You need to predict the correct number based on the player's guess to ensure victory. Use your predictive abilities to determine the correct answer and secure your win!

2025/09/03 17:02:22 INFO dspy.teleprompt.mipro_optimizer_v2: 2: You are the last hope to save the world! A rogue AI has locked away crucial information behind a numeric code. Your mission: use your predictive powers to guess the correct number and unveil the secret code to prevent a global catastrophe. Make your prediction now!

2025/09/03 17:02:22 INFO dspy.teleprompt.mipro_optimizer_v2: 3: You are a pl

  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.32it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 1.00 / 2 (50.0%):  67%|██████▋   | 2/3 [00:00<00:00,  2.69it/s] 


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  2.44it/s]

2025/09/03 17:02:23 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:23 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 66.67

2025/09/03 17:02:23 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 0.00 / 1 (0.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.43it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 2 (50.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.43it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.27it/s]

2025/09/03 17:02:24 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:24 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 17'].
2025/09/03 17:02:24 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67]
2025/09/03 17:02:24 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:24 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.16it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 1.00 / 2 (50.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.16it/s] 


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  2.24it/s]

2025/09/03 17:02:25 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:25 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 12'].
2025/09/03 17:02:25 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67]
2025/09/03 17:02:25 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:25 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.46it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})




**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 2 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.46it/s]

**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  4.09it/s] 

2025/09/03 17:02:26 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:26 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 8', 'Predictor 0: Few-Shot Set 1'].
2025/09/03 17:02:26 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:26 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:26 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 5 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.83it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 2 (100.0%):  67%|██████▋   | 2/3 [00:00<00:00,  3.46it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.89it/s] 

2025/09/03 17:02:27 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:27 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 12'].
2025/09/03 17:02:27 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:27 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:27 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.49it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 2 (100.0%):  67%|██████▋   | 2/3 [00:00<00:00,  2.92it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.76it/s] 

2025/09/03 17:02:27 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:27 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 12'].
2025/09/03 17:02:27 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:27 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:27 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.39it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 2 (100.0%):  67%|██████▋   | 2/3 [00:00<00:00,  2.78it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  1.92it/s] 

2025/09/03 17:02:29 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:29 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 16'].
2025/09/03 17:02:29 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:29 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:29 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.56it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 1.00 / 2 (50.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.56it/s] 


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  2.60it/s]

2025/09/03 17:02:30 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:30 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 13'].
2025/09/03 17:02:30 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:30 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:30 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 9 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.48it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 1.00 / 2 (50.0%):  67%|██████▋   | 2/3 [00:00<00:00,  2.80it/s] 


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.09it/s]

2025/09/03 17:02:31 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:31 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 12'].
2025/09/03 17:02:31 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:31 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:31 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 10 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 0.00 / 1 (0.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.21it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 2 (50.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.21it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.38it/s]

2025/09/03 17:02:32 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:32 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 8', 'Predictor 0: Few-Shot Set 10'].
2025/09/03 17:02:32 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:32 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:32 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 0.00 / 1 (0.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.03it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 2 (50.0%):  67%|██████▋   | 2/3 [00:01<00:00,  2.02it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  2.41it/s]

2025/09/03 17:02:33 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:33 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 8'].
2025/09/03 17:02:33 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:33 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:33 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 12 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.44it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 1.00 / 2 (50.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.44it/s] 


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.26it/s]

2025/09/03 17:02:34 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:34 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 0'].
2025/09/03 17:02:34 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:34 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:34 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 13 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.38it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 2 (100.0%):  67%|██████▋   | 2/3 [00:00<00:00,  2.27it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.02it/s] 

2025/09/03 17:02:35 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:35 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 15'].
2025/09/03 17:02:35 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:35 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:35 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 14 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.52it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 2 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.52it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.06it/s] 

2025/09/03 17:02:36 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:36 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 0'].
2025/09/03 17:02:36 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:36 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:36 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 15 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.32it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 1.00 / 2 (50.0%):  67%|██████▋   | 2/3 [00:00<00:00,  2.67it/s] 


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  1.74it/s]

2025/09/03 17:02:38 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:38 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 17'].
2025/09/03 17:02:38 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:38 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:38 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 16 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.27it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 1.00 / 2 (50.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.27it/s] 


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  2.66it/s]

2025/09/03 17:02:39 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:39 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 6', 'Predictor 0: Few-Shot Set 17'].
2025/09/03 17:02:39 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:39 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:39 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 17 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.61it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 2 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.61it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  1.82it/s] 

2025/09/03 17:02:41 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:41 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 17'].
2025/09/03 17:02:41 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:41 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:41 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 18 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.39it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 2 (100.0%):  67%|██████▋   | 2/3 [00:00<00:00,  2.79it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.31it/s] 

2025/09/03 17:02:42 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:42 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 7'].





2025/09/03 17:02:42 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:42 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:42 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 19 / 27 =====


  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})




**actual**: Prediction(  
    answer='one'  
)

**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})


Average Metric: 0.00 / 1 (0.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.55it/s]

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 2 (50.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.55it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.71it/s]

2025/09/03 17:02:43 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:43 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/09/03 17:02:43 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:43 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:43 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 20 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.71it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 2 (100.0%):  67%|██████▋   | 2/3 [00:00<00:00,  2.19it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.01it/s] 

2025/09/03 17:02:44 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:44 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 4'].
2025/09/03 17:02:44 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:44 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:44 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 21 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 0.00 / 1 (0.0%):   0%|          | 0/3 [00:00<?, ?it/s]
Average Metric: 0.00 / 1 (0.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.45it/s]

**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 2 (50.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.45it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  2.17it/s]

2025/09/03 17:02:45 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:45 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 9'].
2025/09/03 17:02:45 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:45 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:45 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 22 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.36it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 1.00 / 2 (50.0%):  67%|██████▋   | 2/3 [00:00<00:00,  2.57it/s] 


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  2.41it/s]

2025/09/03 17:02:46 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:46 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 3'].
2025/09/03 17:02:46 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:46 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:46 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 23 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.68it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 2 (100.0%):  67%|██████▋   | 2/3 [00:00<00:00,  2.74it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  2.95it/s] 

2025/09/03 17:02:48 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:48 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 11'].
2025/09/03 17:02:48 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:48 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:48 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 24 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 0.00 / 1 (0.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.59it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 2 (50.0%):  67%|██████▋   | 2/3 [00:00<00:00,  2.48it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.05it/s]

2025/09/03 17:02:49 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:49 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 2'].
2025/09/03 17:02:49 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:49 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:49 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 25 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.27it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 1.00 / 2 (50.0%):  67%|██████▋   | 2/3 [00:01<00:00,  2.07it/s] 


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:04<00:00,  1.56s/it]

2025/09/03 17:02:53 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:53 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 0'].
2025/09/03 17:02:53 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:53 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:53 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 26 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.26it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 2 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.26it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:02<00:00,  1.44it/s] 

2025/09/03 17:02:55 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:55 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 6'].
2025/09/03 17:02:55 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:55 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:55 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 27 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]



**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 1 (100.0%):   0%|          | 0/3 [00:00<?, ?it/s]

**actual**: Prediction(  
    answer='one'  
)

Average Metric: 1.00 / 1 (100.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.41it/s]
Average Metric: 1.00 / 2 (50.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.41it/s] 


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  2.99it/s]

2025/09/03 17:02:56 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 17'].
2025/09/03 17:02:56 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:56 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 28 / 27 =====



  0%|          | 0/3 [00:00<?, ?it/s]


**expected**: Example({'number_guess': '1', 'answer': 'one'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='one'  
)


Average Metric: 0.00 / 1 (0.0%):  33%|███▎      | 1/3 [00:00<00:01,  1.47it/s]


**expected**: Example({'number_guess': 'two', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 1.00 / 2 (50.0%):  67%|██████▋   | 2/3 [00:00<00:00,  2.84it/s]


**expected**: Example({'number_guess': '2', 'answer': 'two'}) (input_keys={'number_guess'})

**actual**: Prediction(  
    answer='two'  
)


Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:01<00:00,  1.59it/s]

2025/09/03 17:02:58 INFO dspy.evaluate.evaluate: Average Metric: 2 / 3 (66.7%)
2025/09/03 17:02:58 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 14'].
2025/09/03 17:02:58 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/03 17:02:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/03 17:02:58 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 66.67!





In [11]:
###########################################################################################

In [12]:
optimized_matcher.save("./saved_files/", save_program=True)

In [13]:

optimized_prompt = getattr(optimized_matcher, "prompt", None) \
    or getattr(optimized_matcher, "instructions", None) \
    or getattr(optimized_matcher, "template", None)

print("**Optimized Prompt:**", optimized_prompt)

**Optimized Prompt:** None


In [14]:
if hasattr(optimized_matcher, "examples"):
    print("Few-shot examples:", optimized_matcher.examples)
elif hasattr(optimized_matcher, "demos"):
    print("Few-shot demos:", optimized_matcher.demos)

Few-shot demos: []


In [15]:
print(getattr(optimized_matcher, "compiled_template", None))

None


In [16]:

# Quick peeks (if available in your version)
try:
    print("=== optimized_matcher.inspect() ===")
    print(optimized_matcher.inspect())
except Exception:
    pass

try:
    print("=== tp.inspect() ===")
    print(tp.inspect())
except Exception:
    pass


=== optimized_matcher.inspect() ===
=== tp.inspect() ===


In [17]:
from typing import Any

def _safe(obj: Any, attr: str, default=None):
    return getattr(obj, attr, default)

def _print_section(title: str, content):
    if content:
        print(f"\n===== {title} =====")
        print(content)

def print_dspy_program(program: Any):
    print(">>> Repr:", repr(program))

    # Signature + fields
    sig = _safe(program, "signature")
    _print_section("Signature", sig)
    if sig:
        # Some versions expose .instructions or a docstring
        _print_section("Signature Doc", getattr(sig, "__doc__", None))
        _print_section("Signature Instructions", _safe(sig, "instructions"))
        _print_section("Signature Inputs", _safe(sig, "inputs", _safe(sig, "input_fields", None)))
        _print_section("Signature Outputs", _safe(sig, "outputs", _safe(sig, "output_fields", None)))

    # Where different versions might store the optimized prompt/template
    for k in ("prompt", "instructions", "system_prompt", "template", "compiled_template"):
        _print_section(k, _safe(program, k))

    # Few-shot examples (these keys vary by version/teleprompter)
    examples = None
    for k in ("examples", "fewshot", "demos", "shots", "train_examples", "demo_store"):
        v = _safe(program, k)
        if v:
            _print_section(f"{k} (container)", f"type={type(v)}, len={getattr(v, '__len__', lambda: 'n/a')() if hasattr(v, '__len__') else 'n/a'}")
            examples = v
            break

    # Try to print examples if iterable
    if examples and hasattr(examples, "__iter__"):
        try:
            for i, ex in enumerate(list(examples)[:20]):  # cap to avoid huge logs
                print(f"\n--- Few-shot Example #{i+1} ---")
                if hasattr(ex, "inputs") and callable(ex.inputs):
                    print("inputs:", ex.inputs())
                elif hasattr(ex, "inputs"):
                    print("inputs:", ex.inputs)
                else:
                    print("inputs: <unknown>")

                if hasattr(ex, "outputs") and callable(ex.outputs):
                    print("outputs:", ex.outputs())
                elif hasattr(ex, "outputs"):
                    print("outputs:", ex.outputs)
                else:
                    print("outputs: <unknown>")
        except Exception as e:
            print(f"(could not iterate examples cleanly: {e})")

print("\n================= OPTIMIZED PROGRAM =================")
print_dspy_program(optimized_matcher)


>>> Repr: Predict(NumberPicker(number_guess -> answer
    instructions='Guess a number'
    number_guess = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Number Guess:', 'desc': '${number_guess}'})
    answer = Field(annotation=Literal['one', 'two'] required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Answer:', 'desc': '${answer}'})
))

===== Signature =====
NumberPicker(number_guess -> answer
    instructions='Guess a number'
    number_guess = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Number Guess:', 'desc': '${number_guess}'})
    answer = Field(annotation=Literal['one', 'two'] required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Answer:', 'desc': '${answer}'})
)

===== Signature Doc =====
Guess a number

===== Signature Instructions =====
Guess a number

===== Signature Inputs =====
{'number_guess': FieldInfo(annotation=str, required=True