In [2]:
import sys
import os
import dspy 
from common.my_settings import MySettings  
from common.utils import md
from common.llm_client_factory import LlmClientFactory
from dspy_utils.dspy_helpers import md_dspy

settings = MySettings().get()

lm_gpt35 = dspy.LM('gpt-3.5-turbo', temperature=0.8, model_type='chat', cache=False, api_key=settings.OPENAI_API_KEY)
lm_gpt4 = dspy.LM('gpt-4.1', temperature=0.9, model_type='chat', cache=False, api_key=settings.OPENAI_API_KEY)
dspy.configure(lm=lm_gpt4)

# Create domain classes
from typing import Literal
    
class NumberPicker(dspy.Signature):
    """Guess a number from 1 to 5.."""
    number_guess: str = dspy.InputField()
    answer: Literal["one", "two", "three", "four", "five"] = dspy.OutputField()

numberPickerPredict = dspy.Predict(NumberPicker)
numberPickerPredict(number_guess="even")

trainset = [
    dspy.Example(number_guess="1", answer="one").with_inputs("number_guess"),
    dspy.Example(number_guess="2", answer="two").with_inputs("number_guess"),
    dspy.Example(number_guess="Four", answer="four").with_inputs("number_guess"),
    dspy.Example(number_guess="Five", answer="five").with_inputs("number_guess"),
    dspy.Example(number_guess="The number: 3", answer="trhe").with_inputs("number_guess"),
]

def validate_match(expected, actual, trace=None) -> bool:
    # print()
    # md("**expected**: ", expected)
    # print("**actual**: ", actual)
    # md("**Is match**: ", actual.answer == "two")
    # print()
    return (actual.answer == "three")

from dspy.teleprompt import *

tp = dspy.MIPROv2(metric=validate_match, auto="light", prompt_model=lm_gpt35, task_model=lm_gpt4)
optimized_matcher = tp.compile(numberPickerPredict, trainset=trainset, requires_permission_to_run=False)
optimized_matcher.save("./saved_files/test.json")
print("Done.")


Getting keys from environment variables


2025/09/04 18:13:34 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 10
minibatch: False
num_fewshot_candidates: 6
num_instruct_candidates: 3
valset size: 4

2025/09/04 18:13:34 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/09/04 18:13:34 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/09/04 18:13:34 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...


Bootstrapping set 1/6
Bootstrapping set 2/6
Bootstrapping set 3/6


100%|██████████| 1/1 [00:00<00:00,  1.39it/s]


Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 4/6


100%|██████████| 1/1 [00:01<00:00,  1.19s/it]


Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 5/6


100%|██████████| 1/1 [00:00<00:00,  1.29it/s]


Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 6/6


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]
2025/09/04 18:13:39 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/09/04 18:13:39 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 0 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.


2025/09/04 18:13:41 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=3 instructions...

2025/09/04 18:13:52 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/09/04 18:13:52 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Guess a number from 1 to 5..

2025/09/04 18:13:52 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Given a number guess from 1 to 5, predict the corresponding word representation as accurately as possible. Imagine that the correct prediction is crucial for saving a person's life in a high-stakes situation.

2025/09/04 18:13:52 INFO dspy.teleprompt.mipro_optimizer_v2: 2: Given a high stakes scenario where accurate predictions are crucial, predict the word representation for the numerical value provided in the input.

2025/09/04 18:13:52 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/09/04 18:13:52 INFO dspy.teleprompt.mipro_optimizer_v2: ==> STEP 3: FINDING OPTIMAL PROMPT PARAMETERS <==
2025/09/04 18:13:52 INFO dspy.teleprompt.mipro_opti

Average Metric: 1.00 / 4 (25.0%): 100%|██████████| 4/4 [00:00<00:00,  4.66it/s]

2025/09/04 18:13:53 INFO dspy.evaluate.evaluate: Average Metric: 1 / 4 (25.0%)
2025/09/04 18:13:53 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 25.0

2025/09/04 18:13:53 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 10 =====



Average Metric: 1.00 / 4 (25.0%): 100%|██████████| 4/4 [00:01<00:00,  2.97it/s] 

2025/09/04 18:13:54 INFO dspy.evaluate.evaluate: Average Metric: 1 / 4 (25.0%)
2025/09/04 18:13:54 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 25.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3'].
2025/09/04 18:13:54 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [25.0, 25.0]
2025/09/04 18:13:54 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 25.0


2025/09/04 18:13:54 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 10 =====



Average Metric: 1.00 / 4 (25.0%): 100%|██████████| 4/4 [00:00<00:00,  4.85it/s] 

2025/09/04 18:13:55 INFO dspy.evaluate.evaluate: Average Metric: 1 / 4 (25.0%)
2025/09/04 18:13:55 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 25.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/09/04 18:13:55 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [25.0, 25.0, 25.0]
2025/09/04 18:13:55 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 25.0


2025/09/04 18:13:55 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 10 =====



Average Metric: 1.00 / 4 (25.0%): 100%|██████████| 4/4 [00:00<00:00,  5.31it/s]

2025/09/04 18:13:56 INFO dspy.evaluate.evaluate: Average Metric: 1 / 4 (25.0%)
2025/09/04 18:13:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 25.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5'].
2025/09/04 18:13:56 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [25.0, 25.0, 25.0, 25.0]
2025/09/04 18:13:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 25.0


2025/09/04 18:13:56 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 5 / 10 =====



Average Metric: 1.00 / 4 (25.0%): 100%|██████████| 4/4 [00:01<00:00,  2.37it/s]

2025/09/04 18:13:57 INFO dspy.evaluate.evaluate: Average Metric: 1 / 4 (25.0%)
2025/09/04 18:13:57 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 25.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 2'].
2025/09/04 18:13:57 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [25.0, 25.0, 25.0, 25.0, 25.0]
2025/09/04 18:13:57 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 25.0


2025/09/04 18:13:57 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 10 =====



Average Metric: 1.00 / 4 (25.0%): 100%|██████████| 4/4 [00:00<00:00,  5.02it/s]

2025/09/04 18:13:58 INFO dspy.evaluate.evaluate: Average Metric: 1 / 4 (25.0%)
2025/09/04 18:13:58 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 25.0 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5'].
2025/09/04 18:13:58 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [25.0, 25.0, 25.0, 25.0, 25.0, 25.0]
2025/09/04 18:13:58 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 25.0


2025/09/04 18:13:58 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 10 =====



Average Metric: 1.00 / 4 (25.0%): 100%|██████████| 4/4 [00:01<00:00,  2.23it/s] 

2025/09/04 18:14:00 INFO dspy.evaluate.evaluate: Average Metric: 1 / 4 (25.0%)
2025/09/04 18:14:00 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 25.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/09/04 18:14:00 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0]
2025/09/04 18:14:00 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 25.0


2025/09/04 18:14:00 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 10 =====



Average Metric: 1.00 / 4 (25.0%): 100%|██████████| 4/4 [00:00<00:00,  4.33it/s]

2025/09/04 18:14:01 INFO dspy.evaluate.evaluate: Average Metric: 1 / 4 (25.0%)
2025/09/04 18:14:01 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 25.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/09/04 18:14:01 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0]
2025/09/04 18:14:01 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 25.0


2025/09/04 18:14:01 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 9 / 10 =====



Average Metric: 1.00 / 4 (25.0%): 100%|██████████| 4/4 [00:00<00:00,  5.56it/s] 

2025/09/04 18:14:02 INFO dspy.evaluate.evaluate: Average Metric: 1 / 4 (25.0%)
2025/09/04 18:14:02 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 25.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4'].
2025/09/04 18:14:02 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0]
2025/09/04 18:14:02 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 25.0


2025/09/04 18:14:02 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 10 / 10 =====



Average Metric: 1.00 / 4 (25.0%): 100%|██████████| 4/4 [00:00<00:00,  5.08it/s] 

2025/09/04 18:14:03 INFO dspy.evaluate.evaluate: Average Metric: 1 / 4 (25.0%)





2025/09/04 18:14:03 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 25.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/09/04 18:14:03 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0]
2025/09/04 18:14:03 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 25.0


2025/09/04 18:14:03 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 / 10 =====


Average Metric: 1.00 / 4 (25.0%): 100%|██████████| 4/4 [00:00<00:00,  4.95it/s]

2025/09/04 18:14:04 INFO dspy.evaluate.evaluate: Average Metric: 1 / 4 (25.0%)
2025/09/04 18:14:04 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 25.0 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 0'].
2025/09/04 18:14:04 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0]
2025/09/04 18:14:04 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 25.0


2025/09/04 18:14:04 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 25.0!



Done.


In [None]:
import json

with open("./saved_files/test.json", "r") as f:
    data = json.load(f)

print(json.dumps(data, indent=2))


{
  "traces": [],
  "train": [],
  "demos": [],
  "signature": {
    "instructions": "Guess a number from 1 to 5..",
    "fields": [
      {
        "prefix": "Number Guess:",
        "description": "${number_guess}"
      },
      {
        "prefix": "Answer:",
        "description": "${answer}"
      }
    ]
  },
  "lm": null,
  "metadata": {
    "dependency_versions": {
      "python": "3.12",
      "dspy": "3.0.1",
      "cloudpickle": "3.1"
    }
  }
}
