In [None]:
import sys
import os
import dspy 
from common.my_settings import MySettings  
from common.utils import md
from common.llm_client_factory import LlmClientFactory
from dspy_utils.dspy_helpers import md_dspy

settings = MySettings().get()

lm_gpt35 = dspy.LM('gpt-3.5-turbo', temperature=0.8, model_type='chat', cache=False, api_key=settings.OPENAI_API_KEY)
lm_gpt4 = dspy.LM('gpt-4.1', temperature=0.9, model_type='chat', cache=False, api_key=settings.OPENAI_API_KEY)
dspy.configure(lm=lm_gpt4)

# Create domain classes
from typing import Literal
    
class NumberPicker(dspy.Signature):
    """Guess a number from 1 to 10"""
    number_guess: str = dspy.InputField()
    answer: Literal["one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"] = dspy.OutputField()

numberPickerPredict = dspy.Predict(NumberPicker)
numberPickerPredict(number_guess="even")

trainset = [
    dspy.Example(number_guess="1", answer="one").with_inputs("number_guess"),
    dspy.Example(number_guess="2", answer="two").with_inputs("number_guess"),
    dspy.Example(number_guess="Four", answer="four").with_inputs("number_guess"),
    dspy.Example(number_guess="Five", answer="five").with_inputs("number_guess"),
    dspy.Example(number_guess="The number: 3", answer="three").with_inputs("number_guess"),
    dspy.Example(number_guess="6", answer="six").with_inputs("number_guess"),
    dspy.Example(number_guess="7", answer="seven").with_inputs("number_guess"),
    dspy.Example(number_guess="The number: 8", answer="eight").with_inputs("number_guess"),
    dspy.Example(number_guess="The number: 10", answer="ten").with_inputs("number_guess"),
]

def validate_match(expected, actual, trace=None) -> bool:
    # print()
    # md("**expected**: ", expected)
    # print("**actual**: ", actual)
    # md("**Is match**: ", actual.answer == "two")
    # print()
    return (actual.answer == "three")

from dspy.teleprompt import *

tp = dspy.MIPROv2(metric=validate_match, auto="medium", prompt_model=lm_gpt35, task_model=lm_gpt4, verbose=True)
optimized_matcher = tp.compile(numberPickerPredict, trainset=trainset, requires_permission_to_run=False)
optimized_matcher.save("./saved_files/test.json")

import json

with open("./saved_files/test.json", "r") as f:
    data = json.load(f)

print(json.dumps(data, indent=2))

print("Done.")


Getting keys from environment variables


2025/09/04 18:32:57 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING MEDIUM AUTO RUN SETTINGS:
num_trials: 18
minibatch: False
num_fewshot_candidates: 12
num_instruct_candidates: 6
valset size: 6

2025/09/04 18:32:57 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/09/04 18:32:57 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/09/04 18:32:57 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=12 sets of demonstrations...


Bootstrapping set 1/12
Bootstrapping set 2/12
Bootstrapping set 3/12


100%|██████████| 2/2 [00:01<00:00,  1.20it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 4/12


100%|██████████| 2/2 [00:01<00:00,  1.13it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 5/12


100%|██████████| 2/2 [00:01<00:00,  1.33it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 6/12


100%|██████████| 2/2 [00:01<00:00,  1.17it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 7/12


100%|██████████| 2/2 [00:01<00:00,  1.25it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 8/12


100%|██████████| 2/2 [00:01<00:00,  1.38it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 9/12


100%|██████████| 2/2 [00:01<00:00,  1.07it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 10/12


100%|██████████| 2/2 [00:01<00:00,  1.30it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 11/12


100%|██████████| 2/2 [00:01<00:00,  1.40it/s]


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 12/12


100%|██████████| 2/2 [00:01<00:00,  1.01it/s]
2025/09/04 18:33:13 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/09/04 18:33:13 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 0 full traces after 1 examples for up to 1 rounds, amounting to 2 attempts.
SOURCE CODE: 




2025/09/04 18:33:15 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=6 instructions...



DATA SUMMARY: The dataset contains examples with a `number_guess` input represented as a string and its corresponding textual `answer`. There is a clear pattern where the `number_guess` is converted into its textual equivalent.
Using a randomly generated configuration for our grounded proposer.
Selected tip: persona
PROGRAM DESCRIPTION: The program is designed to be a pipeline for solving tasks using calls to language models. It likely takes in input data, processes it using language models, and provides an output based on the task at hand. This pipeline can be used for various natural language processing tasks such as text classification, sentiment analysis, language translation, and more. The program leverages the capabilities of language models to automate and improve the efficiency of various text-related tasks.
task_demos No task demos provided.




[34m[2025-09-04T18:33:20.430023][0m

[31mSystem message:[0m

Your input fields are:
1. `dataset_description` (str): A description

2025/09/04 18:33:40 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/09/04 18:33:40 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Guess a number from 1 to 3

2025/09/04 18:33:40 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Based on the dataset examples where a `number_guess` is converted into its corresponding `answer`, please predict the textual answer for the given `number_guess`.

2025/09/04 18:33:40 INFO dspy.teleprompt.mipro_optimizer_v2: 2: Given a `number_guess` as a string input, use the language model to predict and generate the corresponding textual `answer`.

2025/09/04 18:33:40 INFO dspy.teleprompt.mipro_optimizer_v2: 3: Given a `number_guess` input represented as a string, provide the corresponding textual `answer` by utilizing the Predict module in the program. The Predict module is designed to make predictions or generate responses based on the input `number_guess`.

2025/09/04 18:33:40 INFO dspy.teleprompt.mipro_optimizer_v2: 4: Predict





[34m[2025-09-04T18:33:40.194738][0m

[31mSystem message:[0m

Your input fields are:
1. `dataset_description` (str): A description of the dataset that we are using.
2. `program_code` (str): Language model program designed to solve a particular task.
3. `program_description` (str): Summary of the task the program is designed to solve, and how it goes about solving it.
4. `module` (str): The module to create an instruction for.
5. `module_description` (str): Description of the module to create an instruction for.
6. `task_demos` (str): Example inputs/outputs of our module.
7. `basic_instruction` (str): Basic instruction.
8. `tip` (str): A suggestion for how to go about generating the new instruction.
Your output fields are:
1. `proposed_instruction` (str): Propose an instruction that will be used to prompt a Language Model to perform this task.
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## dataset_description ## ]]
{dataset

2025/09/04 18:33:41 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:33:41 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 66.67

2025/09/04 18:33:41 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 18 =====
2025/09/04 18:33:41 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Based on the dataset examples where a `number_guess` is converted into its corresponding `answer`, please predict the textual answer for the given `number_guess`.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:00<00:00,  7.49it/s] 

2025/09/04 18:33:42 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:33:42 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 6'].
2025/09/04 18:33:42 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67]
2025/09/04 18:33:42 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:33:42 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 18 =====
2025/09/04 18:33:42 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Predict the textual answer corresponding to the given number_guess.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:01<00:00,  4.88it/s]

2025/09/04 18:33:43 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:33:43 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 2'].
2025/09/04 18:33:43 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67]
2025/09/04 18:33:43 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:33:43 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 18 =====
2025/09/04 18:33:43 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Guess a number from 1 to 3
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:01<00:00,  3.88it/s] 

2025/09/04 18:33:45 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:33:45 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 6'].
2025/09/04 18:33:45 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67]
2025/09/04 18:33:45 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:33:45 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 5 / 18 =====
2025/09/04 18:33:45 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Given a `number_guess` as a string input, use the language model to predict and generate the corresponding textual `answer`.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:04<00:00,  1.32it/s] 

2025/09/04 18:33:49 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:33:49 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 4'].
2025/09/04 18:33:49 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:33:49 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:33:49 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 18 =====
2025/09/04 18:33:49 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Given a `number_guess` input represented as a string, provide the corresponding textual `answer` by utilizing the Predict module in the program. The Predict module is designed to make predictions or generate responses based on the input `number_guess`.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:00<00:00,  7.05it/s] 

2025/09/04 18:33:50 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)





2025/09/04 18:33:50 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 5'].
2025/09/04 18:33:50 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:33:50 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:33:50 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 18 =====
2025/09/04 18:33:50 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Predict the textual answer corresponding to the given number_guess.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:03<00:00,  1.64it/s]

2025/09/04 18:33:54 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:33:54 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 6'].
2025/09/04 18:33:54 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:33:54 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:33:54 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 18 =====
2025/09/04 18:33:54 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Given a `number_guess` as a string input, use the language models in the pipeline to predict and generate the corresponding textual `answer` based on the pattern observed in the dataset where the `number_guess` is converted into its textual equivalent.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:00<00:00,  7.31it/s] 

2025/09/04 18:33:55 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:33:55 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 1'].
2025/09/04 18:33:55 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:33:55 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:33:55 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 9 / 18 =====
2025/09/04 18:33:55 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Given a `number_guess` input represented as a string, provide the corresponding textual `answer` by utilizing the Predict module in the program. The Predict module is designed to make predictions or generate responses based on the input `number_guess`.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:00<00:00,  7.37it/s] 

2025/09/04 18:33:56 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 3'].
2025/09/04 18:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 10 / 18 =====
2025/09/04 18:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Given a `number_guess` input represented as a string, provide the corresponding textual `answer` by utilizing the Predict module in the program. The Predict module is designed to make predictions or generate responses based on the input `number_guess`.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:00<00:00,  7.40it/s] 

2025/09/04 18:33:56 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 3', 'Predictor 0: Few-Shot Set 10'].
2025/09/04 18:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 / 18 =====
2025/09/04 18:33:56 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Guess a number from 1 to 3
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:00<00:00,  6.81it/s] 

2025/09/04 18:33:57 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:33:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 0'].
2025/09/04 18:33:57 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:33:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:33:57 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 12 / 18 =====
2025/09/04 18:33:57 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Based on the dataset examples where a `number_guess` is converted into its corresponding `answer`, please predict the textual answer for the given `number_guess`.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:00<00:00,  6.50it/s] 

2025/09/04 18:33:58 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:33:58 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 6'].
2025/09/04 18:33:58 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:33:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:33:58 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 13 / 18 =====
2025/09/04 18:33:58 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Based on the dataset examples where a `number_guess` is converted into its corresponding `answer`, please predict the textual answer for the given `number_guess`.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:01<00:00,  4.73it/s] 

2025/09/04 18:34:00 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:34:00 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 8'].
2025/09/04 18:34:00 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:34:00 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:34:00 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 14 / 18 =====
2025/09/04 18:34:00 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Based on the dataset examples where a `number_guess` is converted into its corresponding `answer`, please predict the textual answer for the given `number_guess`.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:00<00:00,  7.89it/s] 

2025/09/04 18:34:00 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:34:00 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 0'].
2025/09/04 18:34:00 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:34:00 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:34:00 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 15 / 18 =====
2025/09/04 18:34:00 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Guess a number from 1 to 3
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:00<00:00,  6.22it/s] 

2025/09/04 18:34:01 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:34:01 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 7'].
2025/09/04 18:34:01 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:34:01 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67







2025/09/04 18:34:01 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 16 / 18 =====
2025/09/04 18:34:01 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Given a `number_guess` as a string input, use the language models in the pipeline to predict and generate the corresponding textual `answer` based on the pattern observed in the dataset where the `number_guess` is converted into its textual equivalent.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:00<00:00,  6.55it/s] 

2025/09/04 18:34:02 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:34:02 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 9'].
2025/09/04 18:34:02 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:34:02 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:34:02 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 17 / 18 =====
2025/09/04 18:34:02 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Guess a number from 1 to 3
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:00<00:00,  6.04it/s]

2025/09/04 18:34:03 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)





2025/09/04 18:34:03 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 11'].
2025/09/04 18:34:03 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:34:03 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:34:03 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 18 / 18 =====
2025/09/04 18:34:03 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Given a `number_guess` as a string input, use the language models in the pipeline to predict and generate the corresponding textual `answer` based on the pattern observed in the dataset where the `number_guess` is converted into its textual equivalent.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:01<00:00,  5.88it/s] 

2025/09/04 18:34:05 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)
2025/09/04 18:34:05 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 0'].
2025/09/04 18:34:05 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:34:05 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:34:05 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 19 / 18 =====
2025/09/04 18:34:05 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Based on the dataset examples where a `number_guess` is converted into its corresponding `answer`, please predict the textual answer for the given `number_guess`.
p: Answer:


Average Metric: 4.00 / 6 (66.7%): 100%|██████████| 6/6 [00:00<00:00,  7.20it/s] 

2025/09/04 18:34:05 INFO dspy.evaluate.evaluate: Average Metric: 4 / 6 (66.7%)





2025/09/04 18:34:05 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 66.67 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3'].
2025/09/04 18:34:05 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67, 66.67]
2025/09/04 18:34:05 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 66.67


2025/09/04 18:34:05 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 66.67!


{
  "traces": [],
  "train": [],
  "demos": [],
  "signature": {
    "instructions": "Guess a number from 1 to 3",
    "fields": [
      {
        "prefix": "Number Guess:",
        "description": "${number_guess}"
      },
      {
        "prefix": "Answer:",
        "description": "${answer}"
      }
    ]
  },
  "lm": null,
  "metadata": {
    "dependency_versions": {
      "python": "3.12",
      "dspy": "3.0.1",
      "cloudpickle": "3.1"
    }
  }
}
Done.
