In [9]:
import sys
import os
import dspy 
from common.my_settings import MySettings  
from common.utils import md
from common.llm_client_factory import LlmClientFactory
from dspy_utils.dspy_helpers import md_dspy

settings = MySettings().get()

lm_gpt35 = dspy.LM('gpt-3.5-turbo', temperature=0.8, model_type='chat', cache=False, api_key=settings.OPENAI_API_KEY)
lm_gpt4 = dspy.LM('gpt-4.1', temperature=0.9, model_type='chat', cache=False, api_key=settings.OPENAI_API_KEY)
dspy.configure(lm=lm_gpt4)

# Create domain classes
from typing import Literal
    
class NumberPicker(dspy.Signature):
    """Guess a number from 1 to 10"""
    number_guess: str = dspy.InputField()
    answer: Literal["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"] = dspy.OutputField()

numberPickerPredict = dspy.Predict(NumberPicker)
numberPickerPredict(number_guess="even")

trainset = [
    dspy.Example(number_guess="1", answer="one").with_inputs("number_guess"),
    dspy.Example(number_guess="2", answer="two").with_inputs("number_guess"),
    dspy.Example(number_guess="Four", answer="four").with_inputs("number_guess"),
    dspy.Example(number_guess="Five", answer="five").with_inputs("number_guess"),
    dspy.Example(number_guess="The number: 3", answer="three").with_inputs("number_guess"),
    dspy.Example(number_guess="6", answer="six").with_inputs("number_guess"),
    dspy.Example(number_guess="7", answer="seven").with_inputs("number_guess"),
    dspy.Example(number_guess="The number: 8", answer="eight").with_inputs("number_guess"),
    dspy.Example(number_guess="The number: 10", answer="ten").with_inputs("number_guess"),
]

def validate_match(expected, actual, trace=None) -> bool:
    # print()
    # md("**expected**: ", expected)
    # print("**actual**: ", actual)
    # md("**Is match**: ", actual.answer == "two")
    # print()
    return (actual.answer == "three")

from dspy.teleprompt import *

tp = dspy.MIPROv2(metric=validate_match, auto="medium", prompt_model=lm_gpt35, task_model=lm_gpt4, verbose=True)
optimized_matcher = tp.compile(numberPickerPredict, trainset=trainset, requires_permission_to_run=False)
optimized_matcher.save("./saved_files/test.json")

import json

with open("./saved_files/test.json", "r") as f:
    data = json.load(f)

print(json.dumps(data, indent=2))

print("Done.")


Getting keys from environment variables


2025/09/04 18:35:45 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING MEDIUM AUTO RUN SETTINGS:
num_trials: 18
minibatch: False
num_fewshot_candidates: 12
num_instruct_candidates: 6
valset size: 7

2025/09/04 18:35:45 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/09/04 18:35:45 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/09/04 18:35:45 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=12 sets of demonstrations...


Bootstrapping set 1/12
Bootstrapping set 2/12
Bootstrapping set 3/12


100%|██████████| 2/2 [00:01<00:00,  1.38it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 4/12


100%|██████████| 2/2 [00:01<00:00,  1.35it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 5/12


100%|██████████| 2/2 [00:01<00:00,  1.44it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 6/12


100%|██████████| 2/2 [00:01<00:00,  1.44it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 7/12


100%|██████████| 2/2 [00:01<00:00,  1.57it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 8/12


100%|██████████| 2/2 [00:01<00:00,  1.47it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 9/12


100%|██████████| 2/2 [00:01<00:00,  1.10it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 10/12


100%|██████████| 2/2 [00:01<00:00,  1.34it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 11/12


100%|██████████| 2/2 [00:01<00:00,  1.43it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 12/12


100%|██████████| 2/2 [00:01<00:00,  1.47it/s]
2025/09/04 18:35:59 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/09/04 18:35:59 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
SOURCE CODE: 




2025/09/04 18:36:01 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=6 instructions...



DATA SUMMARY: The dataset appears to involve converting numerical values to their textual representations and vice versa. The examples demonstrate a pattern of providing a numerical value as `number_guess` and its corresponding word representation as the `answer` field, indicating a focus on understanding and generating text representations of numeric values.
Using a randomly generated configuration for our grounded proposer.
Selected tip: simple
PROGRAM DESCRIPTION: The provided pseudocode appears to be for a language model program designed to solve a specific task. The program likely takes input data, processes it through a language model, and generates some output based on the model's predictions or classifications. It seems to involve a pipeline where data is fed into the model, and the model's output is then utilized for further processing or decision-making.
task_demos No task demos provided.




[34m[2025-09-04T18:36:05.859776][0m

[31mSystem message:[0m

Your input fields a

2025/09/04 18:36:24 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/09/04 18:36:24 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Guess a number from 1 to 10

2025/09/04 18:36:24 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Given a numerical value as `number_guess`, provide the corresponding textual representation as the output `answer`.

2025/09/04 18:36:24 INFO dspy.teleprompt.mipro_optimizer_v2: 2: Given a numerical value `number_guess`, provide the textual representation of that number as the output `answer`.

2025/09/04 18:36:24 INFO dspy.teleprompt.mipro_optimizer_v2: 3: Given a numerical value, provide the corresponding textual representation of the number.

2025/09/04 18:36:24 INFO dspy.teleprompt.mipro_optimizer_v2: 4: Given a numerical value as input, provide the textual representation of that number using the language model pipeline. The pipeline will process the input number through different language models to generate the corresponding wo





[34m[2025-09-04T18:36:24.930934][0m

[31mSystem message:[0m

Your input fields are:
1. `dataset_description` (str): A description of the dataset that we are using.
2. `program_code` (str): Language model program designed to solve a particular task.
3. `program_description` (str): Summary of the task the program is designed to solve, and how it goes about solving it.
4. `module` (str): The module to create an instruction for.
5. `module_description` (str): Description of the module to create an instruction for.
6. `task_demos` (str): Example inputs/outputs of our module.
7. `basic_instruction` (str): Basic instruction.
Your output fields are:
1. `proposed_instruction` (str): Propose an instruction that will be used to prompt a Language Model to perform this task.
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## dataset_description ## ]]
{dataset_description}

[[ ## program_code ## ]]
{program_code}

[[ ## program_description

2025/09/04 18:36:27 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:27 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 14.29

2025/09/04 18:36:27 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 18 =====
2025/09/04 18:36:27 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Given a numerical value as `number_guess`, provide the corresponding textual representation as the output `answer`.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:00<00:00,  7.88it/s]

2025/09/04 18:36:28 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:28 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 6'].





2025/09/04 18:36:28 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29]
2025/09/04 18:36:28 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:28 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 18 =====
2025/09/04 18:36:28 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Given a numerical value as input, provide the textual representation of that number using the language model pipeline. The pipeline will process the input number through different language models to generate the corresponding word representation. Please ensure the input number falls within the range specified for accurate results.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:02<00:00,  3.50it/s]

2025/09/04 18:36:30 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:30 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 2'].
2025/09/04 18:36:30 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29]
2025/09/04 18:36:30 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:30 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 18 =====
2025/09/04 18:36:30 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Guess a number from 1 to 10
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:00<00:00,  7.38it/s]

2025/09/04 18:36:31 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:31 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 6'].





2025/09/04 18:36:31 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:31 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:31 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 5 / 18 =====
2025/09/04 18:36:31 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Given a numerical value `number_guess`, provide the textual representation of that number as the output `answer`.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:00<00:00,  7.02it/s]

2025/09/04 18:36:32 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:32 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 4'].
2025/09/04 18:36:32 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:32 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:32 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 18 =====
2025/09/04 18:36:32 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Given a numerical value, provide the corresponding textual representation of the number.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:01<00:00,  5.66it/s]

2025/09/04 18:36:33 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:33 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 5'].
2025/09/04 18:36:33 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:33 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:33 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 18 =====
2025/09/04 18:36:33 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Given a numerical value as input, provide the textual representation of that number using the language model pipeline. The pipeline will process the input number through different language models to generate the corresponding word representation. Please ensure the input number falls within the range specified for accurate results.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:00<00:00,  8.31it/s]

2025/09/04 18:36:34 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:34 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 6'].
2025/09/04 18:36:34 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:34 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:34 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 18 =====
2025/09/04 18:36:34 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Please provide a numerical value as input to the model for prediction. The model will then generate the corresponding textual representation of the input number.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:01<00:00,  6.71it/s]

2025/09/04 18:36:35 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:35 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 1'].
2025/09/04 18:36:35 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:35 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:35 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 9 / 18 =====





2025/09/04 18:36:35 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Given a numerical value, provide the corresponding textual representation of the number.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:00<00:00,  8.00it/s]

2025/09/04 18:36:36 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:36 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 3'].
2025/09/04 18:36:36 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:36 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:36 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 10 / 18 =====
2025/09/04 18:36:36 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Given a numerical value, provide the corresponding textual representation of the number.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:01<00:00,  5.36it/s]

2025/09/04 18:36:38 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:38 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 10'].
2025/09/04 18:36:38 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:38 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:38 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 / 18 =====
2025/09/04 18:36:38 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Guess a number from 1 to 10
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:00<00:00,  7.32it/s]

2025/09/04 18:36:39 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:39 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 0'].





2025/09/04 18:36:39 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:39 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:39 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 12 / 18 =====
2025/09/04 18:36:39 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Given a numerical value as `number_guess`, provide the corresponding textual representation as the output `answer`.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:01<00:00,  5.78it/s]

2025/09/04 18:36:40 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:40 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 6'].
2025/09/04 18:36:40 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:40 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:40 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 13 / 18 =====
2025/09/04 18:36:40 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Given a numerical value as `number_guess`, provide the corresponding textual representation as the output `answer`.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:01<00:00,  6.40it/s]

2025/09/04 18:36:41 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:41 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 8'].
2025/09/04 18:36:41 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:41 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:41 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 14 / 18 =====
2025/09/04 18:36:41 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Given a numerical value as `number_guess`, provide the corresponding textual representation as the output `answer`.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:00<00:00,  8.32it/s]

2025/09/04 18:36:42 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)





2025/09/04 18:36:42 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 0'].
2025/09/04 18:36:42 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:42 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:42 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 15 / 18 =====
2025/09/04 18:36:42 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Guess a number from 1 to 10
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:01<00:00,  6.13it/s]

2025/09/04 18:36:43 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:43 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 7'].
2025/09/04 18:36:43 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:43 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:43 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 16 / 18 =====
2025/09/04 18:36:43 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Please provide a numerical value as input to the model for prediction. The model will then generate the corresponding textual representation of the input number.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:01<00:00,  5.15it/s]

2025/09/04 18:36:45 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:45 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 9'].
2025/09/04 18:36:45 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:45 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:45 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 17 / 18 =====
2025/09/04 18:36:45 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Guess a number from 1 to 10
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:01<00:00,  6.70it/s]

2025/09/04 18:36:46 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:46 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 11'].
2025/09/04 18:36:46 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:46 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:46 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 18 / 18 =====
2025/09/04 18:36:46 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Please provide a numerical value as input to the model for prediction. The model will then generate the corresponding textual representation of the input number.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:01<00:00,  3.91it/s]

2025/09/04 18:36:48 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:48 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 0'].
2025/09/04 18:36:48 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:48 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:48 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 19 / 18 =====
2025/09/04 18:36:48 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Given a numerical value as `number_guess`, provide the corresponding textual representation as the output `answer`.
p: Answer:


Average Metric: 1.00 / 7 (14.3%): 100%|██████████| 7/7 [00:01<00:00,  3.77it/s]

2025/09/04 18:36:50 INFO dspy.evaluate.evaluate: Average Metric: 1 / 7 (14.3%)
2025/09/04 18:36:50 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 14.29 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3'].
2025/09/04 18:36:50 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29, 14.29]
2025/09/04 18:36:50 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 14.29


2025/09/04 18:36:50 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 14.29!



{
  "traces": [],
  "train": [],
  "demos": [],
  "signature": {
    "instructions": "Guess a number from 1 to 10",
    "fields": [
      {
        "prefix": "Number Guess:",
        "description": "${number_guess}"
      },
      {
        "prefix": "Answer:",
        "description": "${answer}"
      }
    ]
  },
  "lm": null,
  "metadata": {
    "dependency_versions": {
      "python": "3.12",
      "dspy": "3.0.1",
      "cloudpickle": "3.1"
    }
  }
}
Done.
